CVPR 2026 Open Access Repository

Findings

Back
Spatial Transcriptomics as Images for Large-Scale Pretraining: Yishun Zhu,

Jiaxin Qi,

Jian Wang,

Yuhua Zheng,

Jianqiang Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yishun and Qi, Jiaxin and Wang, Jian and Zheng, Yuhua and Huang, Jianqiang}, title = {Spatial Transcriptomics as Images for Large-Scale Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1191-1200} }
GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting: Jialin Li,

Bin Fu,

Ruiping Wang,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jialin and Fu, Bin and Wang, Ruiping and Chen, Xilin}, title = {GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {264-274} }
Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency: Erwei Zhao,

Haijin Zeng,

Weiwei Xiao,

Shijie Cao,

Qiben Shan,

Shaocong Wu,

Jingyong Su,

Jie Liu; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Erwei and Zeng, Haijin and Xiao, Weiwei and Cao, Shijie and Shan, Qiben and Wu, Shaocong and Su, Jingyong and Liu, Jie}, title = {Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2937-2946} }
AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens: Purvish Jajal,

Nicholas John Eliopoulos,

Benjamin Shiue-Hal Chou,

George K Thiruvathukal,

Yung-Hsiang Lu,

James C. Davis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Lu, Yung-Hsiang and Davis, James C.}, title = {AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2618-2628} }
Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach: Youngjun Choi,

Joonseong Kang,

Sungjun Lim,

Kyungwoo Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Youngjun and Kang, Joonseong and Lim, Sungjun and Song, Kyungwoo}, title = {Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2387-2397} }
CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning: Meiqi Wang,

Longnyu Xu,

Jun Liu,

Hewu Li,

Han Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Meiqi and Xu, Longnyu and Liu, Jun and Li, Hewu and Qiu, Han}, title = {CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1482-1491} }
AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models: Tianyi Yan,

Tao Tang,

Xingtai Gui,

Yongkang Li,

Jiasen Zheng,

Weiyao Huang,

Lingdong Kong,

Wencheng Han,

Xia Zhou,

Xueyang Zhang,

Yifei Zhan,

Kun Zhan,

Cheng-zhong Xu,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Tianyi and Tang, Tao and Gui, Xingtai and Li, Yongkang and Zheng, Jiasen and Huang, Weiyao and Kong, Lingdong and Han, Wencheng and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1085-1095} }
What Matters for Scalable and Robust Learning in End-to-End Driving Planners?: David Holtz,

Niklas Hanselmann,

Simon Doll,

Marius Cordts,

Bernt Schiele; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Holtz_2026_CVPR, author = {Holtz, David and Hanselmann, Niklas and Doll, Simon and Cordts, Marius and Schiele, Bernt}, title = {What Matters for Scalable and Robust Learning in End-to-End Driving Planners?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {931-941} }
Active Exploration for Sparse Visual Localization: Johanna Lidholm,

Ludvig Dillén,

Zuzana Kukelova,

Torsten Sattler,

Viktor Larsson; [pdf] [supp]
[bibtex]
@InProceedings{Lidholm_2026_CVPR, author = {Lidholm, Johanna and Dill\'en, Ludvig and Kukelova, Zuzana and Sattler, Torsten and Larsson, Viktor}, title = {Active Exploration for Sparse Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {338-347} }
IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes: Carl Lindström,

Mahan Rafidashti,

Maryam Fatemi,

Lars Hammarstrand,

Martin R. Oswald,

Lennart Svensson; [pdf] [supp]
[bibtex]
@InProceedings{Lindstrom_2026_CVPR, author = {Lindstr\"om, Carl and Rafidashti, Mahan and Fatemi, Maryam and Hammarstrand, Lars and Oswald, Martin R. and Svensson, Lennart}, title = {IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {316-326} }
MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation: Mengxi Wu,

Long Zhou,

Zhixia Li,

Adrian Kwan,

Denis Laprise,

Hengyi Huang,

Xiaqing Wu,

Shuang Wu; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mengxi and Zhou, Long and Li, Zhixia and Kwan, Adrian and Laprise, Denis and Huang, Hengyi and Wu, Xiaqing and Wu, Shuang}, title = {MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {990-999} }
SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers: Oded Schlesinger,

Amirhossein Farzam,

J. Matias Di Martino,

Guillermo Sapiro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schlesinger_2026_CVPR, author = {Schlesinger, Oded and Farzam, Amirhossein and Di Martino, J. Matias and Sapiro, Guillermo}, title = {SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2335-2345} }
CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities: Moritz Nottebaum,

Matteo Dunnhofer,

Christian Micheloni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nottebaum_2026_CVPR, author = {Nottebaum, Moritz and Dunnhofer, Matteo and Micheloni, Christian}, title = {CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2493-2502} }
2D Triangle Splatting for Direct Differentiable Mesh Training: Kaifeng Sheng,

Zheng Zhou,

Yingliang Peng,

Qianwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Kaifeng and Zhou, Zheng and Peng, Yingliang and Wang, Qianwei}, title = {2D Triangle Splatting for Direct Differentiable Mesh Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {285-294} }
DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings: Yoonhwa Jung,

Junryu Fu,

Mani Golparvar-Fard; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Yoonhwa and Fu, Junryu and Golparvar-Fard, Mani}, title = {DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2121-2130} }
Debiased One-Shot NAS Via Density-Aware Sampling: Mehraveh Javan Roshtkhari,

Matthew Toews,

Marco Pedersoli; [pdf] [supp]
[bibtex]
@InProceedings{Roshtkhari_2026_CVPR, author = {Roshtkhari, Mehraveh Javan and Toews, Matthew and Pedersoli, Marco}, title = {Debiased One-Shot NAS Via Density-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2357-2366} }
SLAD : Shared LoRA Adapters for Task Specific Distillation: Reda Bensaid,

Yassir Bendou,

Vincent Gripon,

François Leduc-Primeau; [pdf] [supp]
[bibtex]
@InProceedings{Bensaid_2026_CVPR, author = {Bensaid, Reda and Bendou, Yassir and Gripon, Vincent and Leduc-Primeau, Fran\c{c}ois}, title = {SLAD : Shared LoRA Adapters for Task Specific Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2968-2977} }
OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution: Chong Xia,

Fangfu Liu,

Yule Wang,

Yize Pang,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Liu, Fangfu and Wang, Yule and Pang, Yize and Duan, Yueqi}, title = {OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {66-76} }
AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks: Xinghan Liu,

Xiao Liu,

Yifan Xu,

Jiaqi Fu,

Jiayu Huang,

Yixuan Liu,

Yuxiao Dong,

Jie Tang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinghan and Liu, Xiao and Xu, Yifan and Fu, Jiaqi and Huang, Jiayu and Liu, Yixuan and Dong, Yuxiao and Tang, Jie}, title = {AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1700-1710} }
CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels: Ping Guo,

Chengzhou Li,

Guanchen Meng,

Qi Jia,

Jinyuan Liu,

Zhu Liu,

Yu Liu,

Zhongxuan Luo,

Xin Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ping and Li, Chengzhou and Meng, Guanchen and Jia, Qi and Liu, Jinyuan and Liu, Zhu and Liu, Yu and Luo, Zhongxuan and Fan, Xin}, title = {CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1660-1669} }
Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models: Shan Zhao,

Zhao Yang,

Tianwei Yan,

Yusong Gong,

Qian Wan,

Shizhao Chen,

Shezheng Song,

Chengyu Wang,

Meng Wang; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shan and Yang, Zhao and Yan, Tianwei and Gong, Yusong and Wan, Qian and Chen, Shizhao and Song, Shezheng and Wang, Chengyu and Wang, Meng}, title = {Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2304-2313} }
KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving: Rufan Bai,

Tianyi Xue,

Tiantian Zhou,

Weiwei Wu,

Changle Li,

Yuhuan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Rufan and Xue, Tianyi and Zhou, Tiantian and Wu, Weiwei and Li, Changle and Lu, Yuhuan}, title = {KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {980-989} }
GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution: Zehong Ke,

Zhiyuan Liu,

Yuning Wang,

Jinhao Li,

Junkai Jiang,

Yanbo Jiang,

Zhenhua Xu,

Jianqiang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Zehong and Liu, Zhiyuan and Wang, Yuning and Li, Jinhao and Jiang, Junkai and Jiang, Yanbo and Xu, Zhenhua and Wang, Jianqiang}, title = {GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1029-1038} }
A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models: Duo Li,

Zuhao Yang,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Duo and Yang, Zuhao and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2823-2833} }
PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting: Kangmin Seo,

MinKyu Lee,

Tae-Young Kim,

ByeongCheol Lee,

JoonSeoung An,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Kangmin and Lee, MinKyu and Kim, Tae-Young and Lee, ByeongCheol and An, JoonSeoung and Heo, Jae-Pil}, title = {PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {468-477} }
FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation: Dian Shao,

Zhengzheng Xu,

Peiyang Wang,

Like Liu,

Yule Wang,

Jieqi Shi,

Jing Huo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Dian and Xu, Zhengzheng and Wang, Peiyang and Liu, Like and Wang, Yule and Shi, Jieqi and Huo, Jing}, title = {FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1325-1334} }
Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks: Yu Yan,

Sheng Sun,

Shengjia Cheng,

Teli Liu,

Mingfeng Li,

Min Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Yu and Sun, Sheng and Cheng, Shengjia and Liu, Teli and Li, Mingfeng and Liu, Min}, title = {Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {837-846} }
Plug-and-Think: Structured Reasoning for Vision-Language-Action Models: Kaikai Wei,

Di wen,

Xinhai Li,

Senwei Xiang; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Kaikai and wen, Di and Li, Xinhai and Xiang, Senwei}, title = {Plug-and-Think: Structured Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3136-3145} }
Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering: Lin Fan,

Yafei Ou,

Zhipeng Deng,

Pengyu Dai,

Chongxian Hou,

Jiale Yan,

Yaqian Li,

Kaiwen Long,

Xun Gong,

Masayuki Ikebe,

Yefeng Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Lin and Ou, Yafei and Deng, Zhipeng and Dai, Pengyu and Hou, Chongxian and Yan, Jiale and Li, Yaqian and Long, Kaiwen and Gong, Xun and Ikebe, Masayuki and Zheng, Yefeng}, title = {Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2049-2059} }
Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings: Fatemeh Akbarian,

Anahita Baninajjar,

Yingyi Zhang,

Ananth Balashankar,

Amir Aminifar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Akbarian_2026_CVPR, author = {Akbarian, Fatemeh and Baninajjar, Anahita and Zhang, Yingyi and Balashankar, Ananth and Aminifar, Amir}, title = {Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {748-757} }
LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates: Minkwan Kim,

Seungmin Lee,

Junho Kim,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minkwan and Lee, Seungmin and Kim, Junho and Kim, Young Min}, title = {LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {488-497} }
VideoMatGen: PBR Materials through Joint Generative Modeling: Jon Hasselgren,

Milos Hasan,

Zheng Zeng,

Jacob Munkberg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hasselgren_2026_CVPR, author = {Hasselgren, Jon and Hasan, Milos and Zeng, Zheng and Munkberg, Jacob}, title = {VideoMatGen: PBR Materials through Joint Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2440-2450} }
Environmental Understanding Vision-language Model for Embodied Agent: Jinsik Bang,

Jaeyeon Bae,

Donggyu Lee,

Siyeol Jung,

Taehwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bang_2026_CVPR, author = {Bang, Jinsik and Bae, Jaeyeon and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Environmental Understanding Vision-language Model for Embodied Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3092-3102} }
Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints: Jungkon Kim,

Cheolseung Jung,

Jong-Min Choi,

Juseong Lee; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jungkon and Jung, Cheolseung and Choi, Jong-Min and Lee, Juseong}, title = {Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {847-856} }
Learning Vision-Language-Action World Models for Autonomous Driving: Guoqing Wang,

Pin Tang,

Xiangxuan Ren,

Guodongfang Zhao,

Bailan Feng,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Guoqing and Tang, Pin and Ren, Xiangxuan and Zhao, Guodongfang and Feng, Bailan and Ma, Chao}, title = {Learning Vision-Language-Action World Models for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1073-1084} }
Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark: Yibin Ye,

Xichao Teng,

Shuo Chen,

Leqi Liu,

Kun Wang,

Xiaokai Song,

Zhang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yibin and Teng, Xichao and Chen, Shuo and Liu, Leqi and Wang, Kun and Song, Xiaokai and Li, Zhang}, title = {Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1731-1741} }
BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities: Akash Sharma,

Chinmay Mhatre,

Sankalp Gawali,

Ruthvik Bokkasam,

Brij Sharma,

Vishwajeet Pattanaik,

Punit Rathore,

Raghu Krishnapuram,

Vijay Gopal Kovvali,

Anirban Chakraborty,

Yogesh Simmhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Akash and Mhatre, Chinmay and Gawali, Sankalp and Bokkasam, Ruthvik and Sharma, Brij and Pattanaik, Vishwajeet and Rathore, Punit and Krishnapuram, Raghu and Kovvali, Vijay Gopal and Chakraborty, Anirban and Simmhan, Yogesh}, title = {BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2240-2249} }
Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention: Wenhu Zhang,

Yiming Wu,

Huanyu Wang,

YaoYang Liu,

Huanzhang Dou,

Senqiao Yang,

Sitong Wu,

Hanbin Zhao,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenhu and Wu, Yiming and Wang, Huanyu and Liu, YaoYang and Dou, Huanzhang and Yang, Senqiao and Wu, Sitong and Zhao, Hanbin and Jia, Jiaya}, title = {Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2852-2862} }
Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training: Yanyun Wang,

Qingqing Ye,

Li Liu,

Zi Liang,

Haibo Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yanyun and Ye, Qingqing and Liu, Li and Liang, Zi and Hu, Haibo}, title = {Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {768-778} }
Memorization in 3D Shape Generation: An Empirical Study: Shu Pu,

Boya Zeng,

Kaichen Zhou,

Mengyu Wang,

Zhuang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pu_2026_CVPR, author = {Pu, Shu and Zeng, Boya and Zhou, Kaichen and Wang, Mengyu and Liu, Zhuang}, title = {Memorization in 3D Shape Generation: An Empirical Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1828-1838} }
Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings: Luca Parolari,

Nicla Faccioli,

Lamberto Ballan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parolari_2026_CVPR, author = {Parolari, Luca and Faccioli, Nicla and Ballan, Lamberto}, title = {Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1742-1751} }
DaMN: Deleting and Migrating Normalization Layers from Transformers: Alexey Ryabykin,

Irina Zhelavskaya,

Egor Shvetsov,

Alexey Rukhovich,

Nikita Okhotnikov,

Artem Khrapov,

Evgeny Burnaev,

Vladimir Mikhailovich Kryzhanovskiy; [pdf] [supp]
[bibtex]
@InProceedings{Ryabykin_2026_CVPR, author = {Ryabykin, Alexey and Zhelavskaya, Irina and Shvetsov, Egor and Rukhovich, Alexey and Okhotnikov, Nikita and Khrapov, Artem and Burnaev, Evgeny and Kryzhanovskiy, Vladimir Mikhailovich}, title = {DaMN: Deleting and Migrating Normalization Layers from Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2883-2892} }
Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer: Chenhang Cui,

An Zhang,

Yuxin Chen,

Gelei Deng,

Jingnan Zheng,

Zhenkai Liang,

Xiang Wang,

Tat-Seng Chua; [pdf]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Chenhang and Zhang, An and Chen, Yuxin and Deng, Gelei and Zheng, Jingnan and Liang, Zhenkai and Wang, Xiang and Chua, Tat-Seng}, title = {Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2346-2356} }
Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting: Ananta R. Bhattarai,

Helge Rhodin; [pdf] [supp]
[bibtex]
@InProceedings{Bhattarai_2026_CVPR, author = {Bhattarai, Ananta R. and Rhodin, Helge}, title = {Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {602-612} }
TransKV: A Data-Driven Pruning Method for Large Foundation Models: Guangning Xu,

Fanxu Meng,

Ruijie Zhou,

Michael K Ng,

Wenjie Pei,

Muhan Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guangning and Meng, Fanxu and Zhou, Ruijie and Ng, Michael K and Pei, Wenjie and Zhang, Muhan}, title = {TransKV: A Data-Driven Pruning Method for Large Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2451-2461} }
MaMe: Matrix-Based Token Merging: Simin Huo,

Ning Li; [pdf] [supp]
[bibtex]
@InProceedings{Huo_2026_CVPR, author = {Huo, Simin and Li, Ning}, title = {MaMe: Matrix-Based Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2863-2872} }
BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models: Jiayao Wang,

Yiping Zhang,

Mohammad Maruf Hasan,

Xiaoying Lei,

Jiale Zhang,

Junwu Zhu,

Qilin Wu,

Dongfang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayao and Zhang, Yiping and Hasan, Mohammad Maruf and Lei, Xiaoying and Zhang, Jiale and Zhu, Junwu and Wu, Qilin and Zhao, Dongfang}, title = {BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {705-715} }
Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression: Xiang Liu,

Yimin Zhou,

Jinxiang Wang,

Yujun Huang,

Shuzhao Xie,

Shiyu Qin,

Mingyao Hong,

Jiawei Li,

Yaowei Wang,

Zhi Wang,

Shu-Tao Xia,

Bin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiang and Zhou, Yimin and Wang, Jinxiang and Huang, Yujun and Xie, Shuzhao and Qin, Shiyu and Hong, Mingyao and Li, Jiawei and Wang, Yaowei and Wang, Zhi and Xia, Shu-Tao and Chen, Bin}, title = {Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2261-2271} }
See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models: Le Thien Phuc Nguyen,

Zhuoran Yu,

Samuel Low Yu Hang,

Subin An,

Jeongik Lee,

Yohan Ban,

SeungEun Chung,

Thanh-Huy Nguyen,

JuWan Maeng,

Soochahn Lee,

Yong Jae Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Le Thien Phuc and Yu, Zhuoran and Hang, Samuel Low Yu and An, Subin and Lee, Jeongik and Ban, Yohan and Chung, SeungEun and Nguyen, Thanh-Huy and Maeng, JuWan and Lee, Soochahn and Lee, Yong Jae}, title = {See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2272-2283} }
RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer: Yoonwoo Ha,

Hyungpil Moon; [pdf] [supp]
[bibtex]
@InProceedings{Ha_2026_CVPR, author = {Ha, Yoonwoo and Moon, Hyungpil}, title = {RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1525-1534} }
GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models: Lianming Huang,

Haibo Hu,

Qiao Li,

Xin He,

Nan Guan,

Chun Jason Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Lianming and Hu, Haibo and Li, Qiao and He, Xin and Guan, Nan and Xue, Chun Jason}, title = {GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2834-2843} }
Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification: Shenghui Yue,

Rui Wang,

Tianyang Xu,

Tao Zhou,

Xiao-Jun Wu,

Josef Kittler; [pdf]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Shenghui and Wang, Rui and Xu, Tianyang and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2639-2648} }
What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters: Shaobo Liu,

Haobo Xiong,

Kai Liu,

Yuna Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shaobo and Xiong, Haobo and Liu, Kai and Lin, Yuna}, title = {What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2813-2822} }
RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL: Yinzhou Tang,

Yu Shang,

Yinuo Chen,

Bingwen Wei,

Xin Zhang,

Shu'ang Yu,

Liangzhi Shi,

Chao Yu,

Chen Gao,

Wei Wu,

Yong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yinzhou and Shang, Yu and Chen, Yinuo and Wei, Bingwen and Zhang, Xin and Yu, Shu'ang and Shi, Liangzhi and Yu, Chao and Gao, Chen and Wu, Wei and Li, Yong}, title = {RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1472-1481} }
IRL-VLA: Vision-Language-Action Training via Reward World Model: Anqing Jiang,

Gao Yu,

Heng Yuwen,

Yiru Wang,

Wang Shuo,

Jiang Hao,

Sun Hao; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Anqing and Yu, Gao and Yuwen, Heng and Wang, Yiru and Shuo, Wang and Hao, Jiang and Hao, Sun}, title = {IRL-VLA: Vision-Language-Action Training via Reward World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {970-979} }
Softmax-GS: Generalized Gaussians Learning When to Blend or Bound: Chen Ziwen,

Peng Wang,

Hao Tan,

Zexiang Xu,

Li Fuxin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Wang, Peng and Tan, Hao and Xu, Zexiang and Fuxin, Li}, title = {Softmax-GS: Generalized Gaussians Learning When to Blend or Bound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {508-517} }
On the Feasibility and Opportunity of Autoregressive 3D Object Detection: Zanming Huang,

Jinsu Yoo,

Sooyoung Jeon,

Zhenzhen Liu,

Mark Campbell,

Kilian Q Weinberger,

Bharath Hariharan,

Wei-Lun Chao,

Katie Z Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zanming and Yoo, Jinsu and Jeon, Sooyoung and Liu, Zhenzhen and Campbell, Mark and Weinberger, Kilian Q and Hariharan, Bharath and Chao, Wei-Lun and Luo, Katie Z}, title = {On the Feasibility and Opportunity of Autoregressive 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1170-1179} }
LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction: Jiazhou Lin,

Zhongyi Liu,

Ying Shi,

Zhichun Zhao,

Zhuoyu Wang,

Yuhang Zhou,

Huanling Hu,

Guangnan Ye,

Mengtian Li,

Lei Guo; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiazhou and Liu, Zhongyi and Shi, Ying and Zhao, Zhichun and Wang, Zhuoyu and Zhou, Yuhang and Hu, Huanling and Ye, Guangnan and Li, Mengtian and Guo, Lei}, title = {LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1629-1639} }
A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing: Maria Stoica,

Abdelrahman Hekal,

Alessio Lomuscio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stoica_2026_CVPR, author = {Stoica, Maria and Hekal, Abdelrahman and Lomuscio, Alessio}, title = {A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {685-694} }
MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation: Nuolin Sun,

Linyuan Wang,

Haonan Wei,

Lei Li,

Bin Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Nuolin and Wang, Linyuan and Wei, Haonan and Li, Lei and Yan, Bin}, title = {MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2691-2699} }
Speed3R: Sparse Feed-forward 3D Reconstruction Models: Weining Ren,

Xiao Tan,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Weining and Tan, Xiao and Han, Kai}, title = {Speed3R: Sparse Feed-forward 3D Reconstruction Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {119-128} }
Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN: C. Zaghetto,

A. Purim,

W. Oliveira,

J. R. Ribeiro,

H. Nolla,

F. Santos,

M. Chang,

R. H. Vareto; [pdf] [supp]
[bibtex]
@InProceedings{Zaghetto_2026_CVPR, author = {Zaghetto, C. and Purim, A. and Oliveira, W. and Ribeiro, J. R. and Nolla, H. and Santos, F. and Chang, M. and Vareto, R. H.}, title = {Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1221-1229} }
Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection: Hanvitha Saraswathi Mukkamala,

Arun K Pujari; [pdf] [supp]
[bibtex]
@InProceedings{Mukkamala_2026_CVPR, author = {Mukkamala, Hanvitha Saraswathi and Pujari, Arun K}, title = {Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1680-1689} }
GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction: Minsol Kim,

Usman Ali; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minsol and Ali, Usman}, title = {GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {33-42} }
AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents: Yue Cao,

Yingyao Wang,

Pi Bu,

Jingxuan Xing,

Wei Jiang,

Zekun Zhu,

Junpeng Ma,

Sashuai Zhou,

Tong Lu,

Jun Song,

Yu Cheng,

Yuning Jiang,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yue and Wang, Yingyao and Bu, Pi and Xing, Jingxuan and Jiang, Wei and Zhu, Zekun and Ma, Junpeng and Zhou, Sashuai and Lu, Tong and Song, Jun and Cheng, Yu and Jiang, Yuning and Zheng, Bo}, title = {AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1903-1912} }
C^2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination: Yuyang Chen,

Kaiyan Zhao,

Yiming Wang,

Ming Yang,

Bin Rao,

Zhenning Li; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zhao, Kaiyan and Wang, Yiming and Yang, Ming and Rao, Bin and Li, Zhenning}, title = {C{\textasciicircum}2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1128-1137} }
Generative Event Pretraining with Foundation Model Alignment: Jianwen Cao,

Jiaxu Xing,

Nico Messikommer,

Davide Scaramuzza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Jianwen and Xing, Jiaxu and Messikommer, Nico and Scaramuzza, Davide}, title = {Generative Event Pretraining with Foundation Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3189-3199} }
Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models: Gracjan Goral,

Alicja Ziarko,

Piotr Milos,

Michal Nauman,

Maciej Wolczyk,

Michal Kosinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goral_2026_CVPR, author = {Goral, Gracjan and Ziarko, Alicja and Milos, Piotr and Nauman, Michal and Wolczyk, Maciej and Kosinski, Michal}, title = {Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1721-1730} }
BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching: Zhien Dai,

Zhaohui Tang,

Hu Zhang,

Mingjun Pan,

Jin Luo,

Yongfang Xie; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Zhien and Tang, Zhaohui and Zhang, Hu and Pan, Mingjun and Luo, Jin and Xie, Yongfang}, title = {BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1-11} }
FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting: Tianhao Xie,

Linlian Jiang,

Xinxin Zuo,

Yang Wang,

Tiberiu Popa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tianhao and Jiang, Linlian and Zuo, Xinxin and Wang, Yang and Popa, Tiberiu}, title = {FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {202-212} }
Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification: Lulu Fang,

Jiaxiang Qin,

Ruiheng Yan,

Ning Pan,

Haihua Liu,

Xinxin Chen; [pdf]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Lulu and Qin, Jiaxiang and Yan, Ruiheng and Pan, Ning and Liu, Haihua and Chen, Xinxin}, title = {Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2564-2574} }
HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation: Keito Suzuki,

Kunyao Chen,

Lei Wang,

Bang Du,

Runfa Blark Li,

Peng Liu,

Ning Bi,

Truong Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Suzuki_2026_CVPR, author = {Suzuki, Keito and Chen, Kunyao and Wang, Lei and Du, Bang and Li, Runfa Blark and Liu, Peng and Bi, Ning and Nguyen, Truong}, title = {HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {624-634} }
Channel Correlation Loss for Binary Neural Networks: Xindi Zuo,

Wei Zhang,

Hai Yu,

Zhiliang Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Xindi and Zhang, Wei and Yu, Hai and Zhu, Zhiliang}, title = {Channel Correlation Loss for Binary Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2575-2584} }
JACoP: Joint Alignment for Compliant Multi-Agent Prediction: Qingze Tony Liu,

Alen Mrdovic,

Danrui Li,

Mathew Schwartz,

Sejong Yoon,

Mubbasir Kapadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qingze Tony and Mrdovic, Alen and Li, Danrui and Schwartz, Mathew and Yoon, Sejong and Kapadia, Mubbasir}, title = {JACoP: Joint Alignment for Compliant Multi-Agent Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {910-919} }
Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models: Songlong Xing,

Weijie Wang,

Zhengyu Zhao,

Jindong Gu,

Philip Torr,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Songlong and Wang, Weijie and Zhao, Zhengyu and Gu, Jindong and Torr, Philip and Sebe, Nicu}, title = {Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {737-747} }
Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training: Hu Lin,

Chengjiang Long,

Jiqing Zhang,

Chuanlu Jiang,

Huilin Ge,

Erwei Yin,

Baocai Yin,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Hu and Long, Chengjiang and Zhang, Jiqing and Jiang, Chuanlu and Ge, Huilin and Yin, Erwei and Yin, Baocai and Yang, Xin}, title = {Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1335-1345} }
Revisiting Articulated Parts Perception in Robot Manipulation: Xiaoqian Wu,

Yejie Guo,

Xiaoyang Chen,

Lixin Yang,

Cewu Lu,

Yong-Lu Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoqian and Guo, Yejie and Chen, Xiaoyang and Yang, Lixin and Lu, Cewu and Li, Yong-Lu}, title = {Revisiting Articulated Parts Perception in Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1368-1377} }
Vision Language Models are Confused Tourists: Patrick Amadeus Irawan,

Ikhlasul Akmal Hanif,

Muhammad Dehan Al Kautsar,

Genta Indra Winata,

Fajri Koto,

Alham Fikri Aji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Irawan_2026_CVPR, author = {Irawan, Patrick Amadeus and Hanif, Ikhlasul Akmal and Al Kautsar, Muhammad Dehan and Winata, Genta Indra and Koto, Fajri and Aji, Alham Fikri}, title = {Vision Language Models are Confused Tourists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1763-1773} }
Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs: Tianle Chen,

Chaitanya Chakka,

Arjun Reddy Akula,

Xavier Thomas,

Deepti Ghadiyaram; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tianle and Chakka, Chaitanya and Akula, Arjun Reddy and Thomas, Xavier and Ghadiyaram, Deepti}, title = {Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2142-2151} }
Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling: Camille Biscarrat,

Michaël Gharbi,

Rahul Goel,

Jonathan Ragan-Kelley,

Frédo Durand,

Tzu-Mao Li; [pdf] [supp]
[bibtex]
@InProceedings{Biscarrat_2026_CVPR, author = {Biscarrat, Camille and Gharbi, Micha\"el and Goel, Rahul and Ragan-Kelley, Jonathan and Durand, Fr\'edo and Li, Tzu-Mao}, title = {Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1283-1293} }
From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity: Haoming Liu,

Jinnuo Liu,

Yanhao Li,

Liuyang Bai,

Yunkai Ji,

Yuanhe Guo,

Shenji Wan,

Hongyi Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haoming and Liu, Jinnuo and Li, Yanhao and Bai, Liuyang and Ji, Yunkai and Guo, Yuanhe and Wan, Shenji and Wen, Hongyi}, title = {From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2649-2658} }
HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet: Badri N Patro,

Vijay S Agneeswaran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2408-2418} }
Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering: Xiaobin Deng,

Changyu Diao,

Min Li,

Ruohan Yu,

Duanqing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaobin and Diao, Changyu and Li, Min and Yu, Ruohan and Xu, Duanqing}, title = {Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {223-232} }
Rich Feature Learning via Diversification: Xi Leng,

Yongqiang Chen,

Xiaoying Tang,

Yatao Bian; [pdf] [supp]
[bibtex]
@InProceedings{Leng_2026_CVPR, author = {Leng, Xi and Chen, Yongqiang and Tang, Xiaoying and Bian, Yatao}, title = {Rich Feature Learning via Diversification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2462-2472} }
SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting: Yonghan Lee,

Tsung-Wei Huang,

Shiv Gehlot,

Jaehoon Choi,

Guan-Ming Su,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yonghan and Huang, Tsung-Wei and Gehlot, Shiv and Choi, Jaehoon and Su, Guan-Ming and Manocha, Dinesh}, title = {SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {77-87} }
When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization: Md Zarif Hossain,

Awal Ahmed Fime,

Ahmed Imteaj; [pdf] [supp]
[bibtex]
@InProceedings{Hossain_2026_CVPR, author = {Hossain, Md Zarif and Fime, Awal Ahmed and Imteaj, Ahmed}, title = {When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {758-767} }
Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data: Yizhao Xu,

Hongyuan Zhu,

Caiyun Liu,

Tianfu Wang,

Keyu Chen,

Sicheng Xu,

Jiaolong Yang,

Nicholas jing Yuan,

Qi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yizhao and Zhu, Hongyuan and Liu, Caiyun and Wang, Tianfu and Chen, Keyu and Xu, Sicheng and Yang, Jiaolong and Yuan, Nicholas jing and Zhang, Qi}, title = {Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {635-646} }
BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models: Ba Luan Dang,

Vu Tuan Truong,

Long Bao Le; [pdf] [supp]
[bibtex]
@InProceedings{Dang_2026_CVPR, author = {Dang, Ba Luan and Truong, Vu Tuan and Le, Long Bao}, title = {BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {726-736} }
PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer: David Picard,

Nicolas Dufour,

Lucas Degeorge,

Arijit Ghosh,

Davide Allegro,

Tom Ravaud,

Yohann Perron,

Corentin Sautier,

Zeynep Sonat Baltaci,

Fei Meng,

Syrine Kalleli,

Marta López-Rauhut,

Thibaut Loiseau,

Ségolène Albouy,

Raphael Baena,

Elliot Vincent,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Picard_2026_CVPR, author = {Picard, David and Dufour, Nicolas and Degeorge, Lucas and Ghosh, Arijit and Allegro, Davide and Ravaud, Tom and Perron, Yohann and Sautier, Corentin and Baltaci, Zeynep Sonat and Meng, Fei and Kalleli, Syrine and L\'opez-Rauhut, Marta and Loiseau, Thibaut and Albouy, S\'egol\`ene and Baena, Raphael and Vincent, Elliot and Landrieu, Loic}, title = {PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2544-2553} }
D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models: Wenlun Zhang,

Yunshan Zhong,

Zihao Ding,

Xinyu Li,

Kentaro Yoshioka; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenlun and Zhong, Yunshan and Ding, Zihao and Li, Xinyu and Yoshioka, Kentaro}, title = {D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2978-2987} }
CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios: Hangyu Li,

Bofeng Cao,

Zhaohui Liang,

Wuzhen Li,

Juyoung Oh,

Yuxuan Chen,

Shixiao Liang,

Hang Zhou,

Chengyuan Ma,

Jiaxi Liu,

Zheng Li,

Peng Zhang,

Keke Long,

Maolin Liu,

Jackson Jiang,

Chunlei Yu,

Shengxiang Liu,

Hongkai Yu,

Xiaopeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hangyu and Cao, Bofeng and Liang, Zhaohui and Li, Wuzhen and Oh, Juyoung and Chen, Yuxuan and Liang, Shixiao and Zhou, Hang and Ma, Chengyuan and Liu, Jiaxi and Li, Zheng and Zhang, Peng and Long, Keke and Liu, Maolin and Jiang, Jackson and Yu, Chunlei and Liu, Shengxiang and Yu, Hongkai and Li, Xiaopeng}, title = {CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2294-2303} }
RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer: Liu Liu,

Xiaofeng Wang,

Guosheng Zhao,

Keyu Li,

Wenkang Qin,

Jiagang Zhu,

Jiaxiong Qiu,

Guan Huang,

Zhizhong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Liu and Wang, Xiaofeng and Zhao, Guosheng and Li, Keyu and Qin, Wenkang and Zhu, Jiagang and Qiu, Jiaxiong and Huang, Guan and Su, Zhizhong}, title = {RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1410-1420} }
SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction: Kao Zhang,

Tao Song,

Zhihua Hu,

Ming Li,

Xin Ding; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kao and Song, Tao and Hu, Zhihua and Li, Ming and Ding, Xin}, title = {SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2596-2605} }
VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning: Bo Pang,

Chenxi Xu,

Jierui Ren,

Guoping Wang,

Sheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2026_CVPR, author = {Pang, Bo and Xu, Chenxi and Ren, Jierui and Wang, Guoping and Li, Sheng}, title = {VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2028-2037} }
RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies: Guillermo Garcia-Cobo,

Maximilian Igl,

Peter Karkus,

Zhejun Zhang,

Michael Watson,

Yuxiao Chen,

Boris Ivanovic,

Marco Pavone; [pdf] [supp]
[bibtex]
@InProceedings{Garcia-Cobo_2026_CVPR, author = {Garcia-Cobo, Guillermo and Igl, Maximilian and Karkus, Peter and Zhang, Zhejun and Watson, Michael and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1000-1009} }
Self-Evolving 3D Scene Generation from a Single Image: Kaizhi Zheng,

Yue Fan,

Jing Gu,

Zishuo Xu,

Xuehai He,

Xin Eric Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Kaizhi and Fan, Yue and Gu, Jing and Xu, Zishuo and He, Xuehai and Wang, Xin Eric}, title = {Self-Evolving 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {579-590} }
SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting: Jikai Wang,

Xingtai Gui,

Jiahao Gong,

Feiyang Tan,

Wencheng Han,

Cheng-Zhong Xu,

Jianbing Shen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jikai and Gui, Xingtai and Gong, Jiahao and Tan, Feiyang and Han, Wencheng and Xu, Cheng-Zhong and Shen, Jianbing}, title = {SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1039-1049} }
Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness: Debarshi Brahma,

Soma Biswas; [pdf] [supp]
[bibtex]
@InProceedings{Brahma_2026_CVPR, author = {Brahma, Debarshi and Biswas, Soma}, title = {Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {656-665} }
RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection: Ozsel Kilinc,

Cem Tarhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kilinc_2026_CVPR, author = {Kilinc, Ozsel and Tarhan, Cem}, title = {RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1159-1169} }
MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing: Changho Choi,

Minho Kim,

Jinkyu Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Changho and Kim, Minho and Kim, Jinkyu}, title = {MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2659-2668} }
RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction: Yangfan Zhao,

Hanwei Zhang,

Ke Huang,

Qiufeng Wang,

Zhenzhou Shao,

Dengyu Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yangfan and Zhang, Hanwei and Huang, Ke and Wang, Qiufeng and Shao, Zhenzhou and Wu, Dengyu}, title = {RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1492-1502} }
PolyReal: A Benchmark for Real-World Polymer Science Workflows: Wanhao Liu,

Weida Wang,

Jiaqing Xie,

Suorong Yang,

Jue Wang,

Benteng Chen,

Guangtao Mei,

Zonglin Yang,

Shufei Zhang,

Yuchun Mo,

Lang Cheng,

Jin Zeng,

Houqiang Li,

Wanli Ouyang,

Yuqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Wanhao and Wang, Weida and Xie, Jiaqing and Yang, Suorong and Wang, Jue and Chen, Benteng and Mei, Guangtao and Yang, Zonglin and Zhang, Shufei and Mo, Yuchun and Cheng, Lang and Zeng, Jin and Li, Houqiang and Ouyang, Wanli and Li, Yuqiang}, title = {PolyReal: A Benchmark for Real-World Polymer Science Workflows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1954-1964} }
Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization: Linsi Wu,

Gang Shen,

Xuefei Lv,

Chenglong Wu,

Yuru Pei; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Linsi and Shen, Gang and Lv, Xuefei and Wu, Chenglong and Pei, Yuru}, title = {Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2669-2679} }
On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks: Ziad Tariq Muhammad Ali,

Raja Muhammad Atif Azad,

Muhammad Ajmal Azad,

Iain Rice,

Umar Daraz,

Ali Shariq Imran,

James Holyhead; [pdf] [supp]
[bibtex]
@InProceedings{Ali_2026_CVPR, author = {Ali, Ziad Tariq Muhammad and Azad, Raja Muhammad Atif and Azad, Muhammad Ajmal and Rice, Iain and Daraz, Umar and Imran, Ali Shariq and Holyhead, James}, title = {On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {809-818} }
HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints: Shurui Liu,

Weide Chen,

Ancong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shurui and Chen, Weide and Wu, Ancong}, title = {HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {538-546} }
WildAni4D: Towards 4D Animal Mesh Reconstruction: Gyeongsu Cho,

Hezhen Hu,

Donghyeon Soon,

Changwoo Kang,

Kyungdon Joo; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Gyeongsu and Hu, Hezhen and Soon, Donghyeon and Kang, Changwoo and Joo, Kyungdon}, title = {WildAni4D: Towards 4D Animal Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {160-169} }
PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET: Yixin Chen,

Yan Wang,

Wenrui Shao,

Zhaoheng Xie; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Wang, Yan and Shao, Wenrui and Xie, Zhaoheng}, title = {PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2534-2543} }
LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting: Yicheng Rui,

Xiao-Wei Duan,

Licai Deng,

Fan Yang,

Zhengming Dang,

Zhengjun Du,

Junhao Peng,

Wenhao Chu,

Umut Mahmut,

Kexin Li,

Yiyun Wu,

Fabo Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rui_2026_CVPR, author = {Rui, Yicheng and Duan, Xiao-Wei and Deng, Licai and Yang, Fan and Dang, Zhengming and Du, Zhengjun and Peng, Junhao and Chu, Wenhao and Mahmut, Umut and Li, Kexin and Wu, Yiyun and Feng, Fabo}, title = {LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1774-1785} }
Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation: Junyu Chen,

Md Yousuf Harun,

Christopher Kanan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junyu and Harun, Md Yousuf and Kanan, Christopher}, title = {Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2284-2293} }
RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes: Zhichao Sun,

Yepeng Liu,

Zhiling Su,

Huachao Zhu,

Yuliang Gu,

Yuda Zou,

Zelong Liu,

Gui-Song Xia,

Bo Du,

Yongchao Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhichao and Liu, Yepeng and Su, Zhiling and Zhu, Huachao and Gu, Yuliang and Zou, Yuda and Liu, Zelong and Xia, Gui-Song and Du, Bo and Xu, Yongchao}, title = {RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1752-1762} }
ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction: Sirshapan Mitra,

Yogesh S Rawat; [pdf] [arXiv]
[bibtex]
@InProceedings{Mitra_2026_CVPR, author = {Mitra, Sirshapan and Rawat, Yogesh S}, title = {ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {22-32} }
Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models: Feng Chen,

Chenhui Gou,

Yefei He,

Yang Yang,

Bohan Zhuang,

Qi Wu; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Feng and Gou, Chenhui and He, Yefei and Yang, Yang and Zhuang, Bohan and Wu, Qi}, title = {Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3050-3059} }
Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks: Zhiqiu Xia,

Furong Mu,

Qi Li,

Shanshan Zhang,

Jie Gui,

Chunpeng Wang,

Yunan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zhiqiu and Mu, Furong and Li, Qi and Zhang, Shanshan and Gui, Jie and Wang, Chunpeng and Liu, Yunan}, title = {Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1650-1659} }
A Simple Framework for Visual Navigation: Faith Johnson,

Bryan Bo Cao,

Shubham Jain,

Ashwin Ashok,

Kristin Dana; [pdf] [arXiv]
[bibtex]
@InProceedings{Johnson_2026_CVPR, author = {Johnson, Faith and Cao, Bryan Bo and Jain, Shubham and Ashok, Ashwin and Dana, Kristin}, title = {A Simple Framework for Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3167-3177} }
Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models: Yiwei You,

Zan Chen,

Bo Wang,

Xiaofei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Yiwei and Chen, Zan and Wang, Bo and Zhou, Xiaofei}, title = {Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2324-2334} }
UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation: Junliang Ye,

Zehuan Huang,

Yansong Qu,

Chunshi Wang,

Yunhan Yang,

Yang Li,

Yawei Luo,

Zhuo Chen,

Sheng Lu,

Jun Zhu,

Chunchao Guo; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Junliang and Huang, Zehuan and Qu, Yansong and Wang, Chunshi and Yang, Yunhan and Li, Yang and Luo, Yawei and Chen, Zhuo and Lu, Sheng and Zhu, Jun and Guo, Chunchao}, title = {UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {613-623} }
Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes: Ashish Kumar,

Aarthi S,

Akshay Agarwal; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and S, Aarthi and Agarwal, Akshay}, title = {Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {857-866} }
Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding: Huan Hu,

Ping Chen,

Zezhou Chen,

Zhaoxiang Liu,

Zipeng Wang,

Xiang Liu,

Xin Wang,

Kai Wang,

Shiguo Lian; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Huan and Chen, Ping and Chen, Zezhou and Liu, Zhaoxiang and Wang, Zipeng and Liu, Xiang and Wang, Xin and Wang, Kai and Lian, Shiguo}, title = {Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1986-1995} }
FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift: Jinshan Lai,

Tingxuan Huang,

Baoyang Jiang,

Liuyu Xiang,

Qiang Ma,

Jianwei Hu; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Jinshan and Huang, Tingxuan and Jiang, Baoyang and Xiang, Liuyu and Ma, Qiang and Hu, Jianwei}, title = {FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2514-2523} }
Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras: Hodaka Kawachi,

Tomoya Nakamura,

Hiroaki Santo,

SaiKiran Kumar Tedla,

Trevor D Canham,

Yasushi Yagi,

Michael S. Brown; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawachi_2026_CVPR, author = {Kawachi, Hodaka and Nakamura, Tomoya and Santo, Hiroaki and Tedla, SaiKiran Kumar and Canham, Trevor D and Yagi, Yasushi and Brown, Michael S.}, title = {Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1273-1282} }
AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization: Zhaorong Wang,

Yoshihiro Kanamori,

Yuki Endo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhaorong and Kanamori, Yoshihiro and Endo, Yuki}, title = {AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {425-435} }
DeepFakeShield: A Proactive Defense Against Malicious Face Swapping: Saeed Karimi-Bidhendi,

Joseph DeGol,

Eric Wengrowski,

Dominic Roberts,

Kristin Dana; [pdf] [supp]
[bibtex]
@InProceedings{Karimi-Bidhendi_2026_CVPR, author = {Karimi-Bidhendi, Saeed and DeGol, Joseph and Wengrowski, Eric and Roberts, Dominic and Dana, Kristin}, title = {DeepFakeShield: A Proactive Defense Against Malicious Face Swapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {867-877} }
FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer: Shenghe Zheng,

Minyu Zhang,

Tianhao Liu,

Hongzhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Shenghe and Zhang, Minyu and Liu, Tianhao and Wang, Hongzhi}, title = {FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2793-2802} }
VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing: Andong Deng,

Dawei Du,

Zhenfang Chen,

Wen Zhong,

Fan Chen,

Guang Chen,

Chia-Wen Kuo,

Longyin Wen,

Chen Chen,

Sijie Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Andong and Du, Dawei and Chen, Zhenfang and Zhong, Wen and Chen, Fan and Chen, Guang and Kuo, Chia-Wen and Wen, Longyin and Chen, Chen and Zhu, Sijie}, title = {VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2187-2196} }
Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting: Zhenhe Liang,

Congqi Cao,

Lanshu Hu,

Liujie Pan; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhenhe and Cao, Congqi and Hu, Lanshu and Pan, Liujie}, title = {Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2554-2563} }
ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding: Lingjun Zhao,

Yandong Luo,

James Hays,

Lu Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lingjun and Luo, Yandong and Hays, James and Gan, Lu}, title = {ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1398-1409} }
PEPR: Privileged Event-based Predictive Regularization for Domain Generalization: Gabriele Magrini,

Federico Becattini,

Niccolò Biondi,

Pietro Pala; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magrini_2026_CVPR, author = {Magrini, Gabriele and Becattini, Federico and Biondi, Niccol\`o and Pala, Pietro}, title = {PEPR: Privileged Event-based Predictive Regularization for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3209-3219} }
DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting: Minwei Wen,

Yang Wei,

Junhao Xiao,

Xiuli Bi,

Bin Xiao; [pdf]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Minwei and Wei, Yang and Xiao, Junhao and Bi, Xiuli and Xiao, Bin}, title = {DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {779-788} }
CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion: Bingyi Liu,

Chuanhui Zhu,

Hongfei Xue,

Jian Teng,

Jipeng Liu,

Enshu Wang,

Penglin Dai,

Pu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Bingyi and Zhu, Chuanhui and Xue, Hongfei and Teng, Jian and Liu, Jipeng and Wang, Enshu and Dai, Penglin and Wang, Pu}, title = {CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {99-108} }
Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models: Canyu Chen,

Yuguang Yang,

Zhewen Tan,

Yizhi Wang,

Ruiyi Zhan,

Haiyan Liu,

Xuanyao Mao,

Jason Bao,

Xinyue Tang,

Linlin Yang,

Bingchuan Sun,

Yan Wang,

Baochang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Canyu and Yang, Yuguang and Tan, Zhewen and Wang, Yizhi and Zhan, Ruiyi and Liu, Haiyan and Mao, Xuanyao and Bao, Jason and Tang, Xinyue and Yang, Linlin and Sun, Bingchuan and Wang, Yan and Zhang, Baochang}, title = {Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1062-1072} }
APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition: Geunyoung Jung,

Soohong Kim,

Inseok Kong,

Jiyoung Jung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Geunyoung and Kim, Soohong and Kong, Inseok and Jung, Jiyoung}, title = {APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {789-798} }
Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation: Cong Li,

Gong Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Cong and Cheng, Gong}, title = {Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3040-3049} }
Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks: Yachan Guo,

Jose Lu Gómez,

Danna Xue,

Yi Xiao,

Antonio M. López; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yachan and Lu G\'omez, Jose and Xue, Danna and Xiao, Yi and L\'opez, Antonio M.}, title = {Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3231-3240} }
AR4D: Autoregressive 4D Generation from Monocular Videos: Hanxin Zhu,

Tianyu He,

Zhibo Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hanxin and He, Tianyu and Chen, Zhibo}, title = {AR4D: Autoregressive 4D Generation from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {88-98} }
HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects: Radim Spetlik,

Michal Pliska,

Vojtěch Vrba,

Jiří Matas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Spetlik_2026_CVPR, author = {Spetlik, Radim and Pliska, Michal and Vrba, Vojt\v{e}ch and Matas, Ji\v{r}{\'\i}}, title = {HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3200-3208} }
OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving: Zhenguo Zhang,

Haohan Zheng,

Yishen Wang,

Le Xu,

Tianchen Deng,

Xuefeng Chen,

Qu Chen,

Bo Zhang,

Wuxiong Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenguo and Zheng, Haohan and Wang, Yishen and Xu, Le and Deng, Tianchen and Chen, Xuefeng and Chen, Qu and Zhang, Bo and Huang, Wuxiong}, title = {OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1106-1116} }
OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM: Leqian Ding,

Caibo Li,

Yu Guo,

Fei Wang; [pdf]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Leqian and Li, Caibo and Guo, Yu and Wang, Fei}, title = {OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1431-1440} }
Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction: Ziyao Guo,

Kaipeng Zhang,

Michael Qizhe Shieh; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyao and Zhang, Kaipeng and Shieh, Michael Qizhe}, title = {Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1230-1239} }
PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception: Armin Maleki,

Hayder Radha; [pdf] [supp]
[bibtex]
@InProceedings{Maleki_2026_CVPR, author = {Maleki, Armin and Radha, Hayder}, title = {PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1138-1147} }
UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models: Yimu Wang,

Weiming Zhuang,

Chen Chen,

Jiabo Huang,

Jingtao Li,

Lingjuan Lyu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yimu and Zhuang, Weiming and Chen, Chen and Huang, Jiabo and Li, Jingtao and Lyu, Lingjuan}, title = {UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2904-2914} }
Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light: Ali Hassani,

Fengzhe Zhou,

Aditya Kane,

Jiannan Huang,

Chieh-Yun Chen,

Min Shi,

Steven Walton,

Markus Hoehnerbach,

Vijay Thakkar,

Mikhail Isaev,

Qinsheng Zhang,

Bing Xu,

Haicheng Wu,

Wen-mei Hwu,

Ming-Yu Liu,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hassani_2026_CVPR, author = {Hassani, Ali and Zhou, Fengzhe and Kane, Aditya and Huang, Jiannan and Chen, Chieh-Yun and Shi, Min and Walton, Steven and Hoehnerbach, Markus and Thakkar, Vijay and Isaev, Mikhail and Zhang, Qinsheng and Xu, Bing and Wu, Haicheng and Hwu, Wen-mei and Liu, Ming-Yu and Shi, Humphrey}, title = {Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3009-3018} }
SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers: Takuro Kawada,

Shunsuke Kitada,

Sota Nemoto,

Hitoshi Iyatomi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawada_2026_CVPR, author = {Kawada, Takuro and Kitada, Shunsuke and Nemoto, Sota and Iyatomi, Hitoshi}, title = {SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2250-2260} }
InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models: Zhiqiang Sheng,

Xumeng Han,

Zhiwei Zhang,

Zenghui Xiong,

Yifan Ding,

Aoxiang Ping,

Xiang Li,

Tong Guo,

Yao Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Zhiqiang and Han, Xumeng and Zhang, Zhiwei and Xiong, Zenghui and Ding, Yifan and Ping, Aoxiang and Li, Xiang and Guo, Tong and Mao, Yao}, title = {InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2176-2186} }
RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video: Chenyu Wu,

Wanhua Li,

Chen Zhu-Tian,

Hanspeter Pfister; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyu and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {547-557} }
JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search: Dongyun Zou,

Zhuoyang Zhang,

Junyu Chen,

Wenkun He,

Qinhe Peng,

Hanrong Ye,

Yao Lu,

Hongxu Yin,

Yu Wang,

Song Han,

Han Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Dongyun and Zhang, Zhuoyang and Chen, Junyu and He, Wenkun and Peng, Qinhe and Ye, Hanrong and Lu, Yao and Yin, Hongxu and Wang, Yu and Han, Song and Cai, Han}, title = {JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2957-2967} }
Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy: Qihong Zhao,

Shaokang Yan,

Zhimin Qiao,

Jinjia Wang,

Bo Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qihong and Yan, Shaokang and Qiao, Zhimin and Wang, Jinjia and Xiong, Bo}, title = {Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {478-487} }
Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation: Chenhao Shi,

Yichen Zhu,

Junjie Wen,

Yefei Chen,

Ziang Liu,

Faming Fang; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Chenhao and Zhu, Yichen and Wen, Junjie and Chen, Yefei and Liu, Ziang and Fang, Faming}, title = {Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1388-1397} }
FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning: Alina Devkota,

Jacob Thrasher,

Donald Adjeroh,

Binod Bhattarai,

Prashnna k. Gyawali; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Devkota_2026_CVPR, author = {Devkota, Alina and Thrasher, Jacob and Adjeroh, Donald and Bhattarai, Binod and Gyawali, Prashnna k.}, title = {FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2803-2812} }
iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning: Pan Mu,

Yuchao Zhu,

Shiqi Zhang,

Hanting Yan,

Jinglin Zhang,

Cong Bai; [pdf] [supp]
[bibtex]
@InProceedings{Mu_2026_CVPR, author = {Mu, Pan and Zhu, Yuchao and Zhang, Shiqi and Yan, Hanting and Zhang, Jinglin and Bai, Cong}, title = {iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1587-1596} }
Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment: Yibo Liu,

Ziwei Zhang,

Haozhou Pang,

Menghao Li,

Lanshan He,

Gan Qi; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yibo and Zhang, Ziwei and Pang, Haozhou and Li, Menghao and He, Lanshan and Qi, Gan}, title = {Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {381-390} }
Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition: Zhuohao Chen,

Zeng Li,

Yifei Zhang,

Chang Liu,

Yu Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuohao and Li, Zeng and Zhang, Yifei and Liu, Chang and Zhou, Yu}, title = {Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1577-1586} }
Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models: Yue Zhang,

Rui Wang,

Jiehong Lin,

Zhongrui Wang,

Xiaojuan Qi; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yue and Wang, Rui and Lin, Jiehong and Wang, Zhongrui and Qi, Xiaojuan}, title = {Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1358-1367} }
SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes: Jungho Lee,

Minhyeok Lee,

Sunghun Yang,

Minseok Kang,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jungho and Lee, Minhyeok and Yang, Sunghun and Kang, Minseok and Lee, Sangyoun}, title = {SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {447-456} }
Affine Bases for Affine Spaces: Gabriel Dogadov,

Marc Alexa; [pdf] [supp]
[bibtex]
@InProceedings{Dogadov_2026_CVPR, author = {Dogadov, Gabriel and Alexa, Marc}, title = {Affine Bases for Affine Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {213-222} }
A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering: Pritham K Jena,

Bhavika Baburaj,

Tushar Anand,

Vedant Dutta,

Vineeth Ulavala,

Sk Aziz Ali; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jena_2026_CVPR, author = {Jena, Pritham K and Baburaj, Bhavika and Anand, Tushar and Dutta, Vedant and Ulavala, Vineeth and Ali, Sk Aziz}, title = {A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1913-1923} }
CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving: Yanlin Jiang,

Yuchen Liu,

Mingren Liu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yanlin and Liu, Yuchen and Liu, Mingren}, title = {CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1117-1127} }
The DeepSpeak Dataset: Sarah Barrington,

Maty Bohacek,

Hany Farid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barrington_2026_CVPR, author = {Barrington, Sarah and Bohacek, Maty and Farid, Hany}, title = {The DeepSpeak Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1893-1902} }
ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers: Chih-Chung Hsu,

Xin-Di Ma,

Wo-Ting Liao,

Chia-Ming Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsu_2026_CVPR, author = {Hsu, Chih-Chung and Ma, Xin-Di and Liao, Wo-Ting and Lee, Chia-Ming}, title = {ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2988-2997} }
When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers: Aditya Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Aditya}, title = {When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {829-836} }
Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors: Vishal Purohit,

Wei Chen,

Qiang Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Purohit_2026_CVPR, author = {Purohit, Vishal and Chen, Wei and Qiu, Qiang}, title = {Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1263-1272} }
Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving: Xuepei Yang,

Mingtao Feng,

Weisheng Dong,

Lin Chen,

Jie Feng,

Fangfang Wu,

Yufan Zhu,

Ajmal Saeed Mian; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xuepei and Feng, Mingtao and Dong, Weisheng and Chen, Lin and Feng, Jie and Wu, Fangfang and Zhu, Yufan and Mian, Ajmal Saeed}, title = {Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {942-951} }
Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding: Jeongwan Shin,

Jaehyeon Kim,

Donguk Ko,

Jaeho Choi; [pdf] [supp]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Jeongwan and Kim, Jaehyeon and Ko, Donguk and Choi, Jaeho}, title = {Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2208-2219} }
FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views: Chaoyi Zhou,

Run Wang,

Feng Luo,

Mert D. Pesé,

Zhiwen Fan,

Yiqi Zhong,

Siyu Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chaoyi and Wang, Run and Luo, Feng and Pes\'e, Mert D. and Fan, Zhiwen and Zhong, Yiqi and Huang, Siyu}, title = {FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {129-138} }
HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes: Mauricio Soroco,

Francesco Pittaluga,

Zaid Tasneem,

Abhishek Aich,

Bingbing Zhuang,

Wuyang Chen,

Manmohan Chandraker,

Ziyu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soroco_2026_CVPR, author = {Soroco, Mauricio and Pittaluga, Francesco and Tasneem, Zaid and Aich, Abhishek and Zhuang, Bingbing and Chen, Wuyang and Chandraker, Manmohan and Jiang, Ziyu}, title = {HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {952-959} }
CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation: Pingrui Lai,

Yanshan Zhou,

Zihao Xie,

Hua Yang; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Pingrui and Zhou, Yanshan and Xie, Zihao and Yang, Hua}, title = {CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2197-2207} }
Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game: Jeonghwan Kim,

Wontaek Kim,

Yidan Lu,

Jin Cheng,

Fatemeh Zargarbashi,

Zicheng Zeng,

Zekun Qi,

Zhiyang Dou,

Nitish Sontakke,

Donghoon Baek,

Li Yi,

Sehoon Ha,

Tianyu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonghwan and Kim, Wontaek and Lu, Yidan and Cheng, Jin and Zargarbashi, Fatemeh and Zeng, Zicheng and Qi, Zekun and Dou, Zhiyang and Sontakke, Nitish and Baek, Donghoon and Yi, Li and Ha, Sehoon and Li, Tianyu}, title = {Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1421-1430} }
When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI: Yanhui Li,

Qi Zhou,

Zhihong Xu,

Huizhong Guo,

Wenhai Wang,

Dongxia Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanhui and Zhou, Qi and Xu, Zhihong and Guo, Huizhong and Wang, Wenhai and Wang, Dongxia}, title = {When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2038-2048} }
See Tomorrow, Act Today: Foresight-Driven Autonomous Driving: Bozhou Zhang,

Nan Song,

Yuang Wang,

Jiankang Deng,

Xiatian Zhu,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bozhou and Song, Nan and Wang, Yuang and Deng, Jiankang and Zhu, Xiatian and Zhang, Li}, title = {See Tomorrow, Act Today: Foresight-Driven Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1180-1190} }
GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking: Zekun Qian,

Ruize Han,

Zhixiang Wang,

Liang Wan,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Zekun and Han, Ruize and Wang, Zhixiang and Wan, Liang and Feng, Wei}, title = {GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1872-1882} }
Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI: Mingjie Li,

Edward Kim,

Yue Zhao,

Ehsan Adeli,

Kilian M. Pohl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingjie and Kim, Edward and Zhao, Yue and Adeli, Ehsan and Pohl, Kilian M.}, title = {Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1211-1220} }
RoadTones: Tone Controllable Text Generation from Road Event Videos: Chirag Parikh,

Siddhi Pravin Lipare,

Ravi Kiran Sarvadevabhatla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parikh_2026_CVPR, author = {Parikh, Chirag and Lipare, Siddhi Pravin and Sarvadevabhatla, Ravi Kiran}, title = {RoadTones: Tone Controllable Text Generation from Road Event Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1019-1028} }
SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts: Shun Inadumi,

Shohei Tanaka,

Tosho Hirasawa,

Atsushi Hashimoto,

Koichiro Yoshino,

Yoshitaka Ushiku; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Inadumi_2026_CVPR, author = {Inadumi, Shun and Tanaka, Shohei and Hirasawa, Tosho and Hashimoto, Atsushi and Yoshino, Koichiro and Ushiku, Yoshitaka}, title = {SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2131-2141} }
Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction: Chen Ziwen,

Hao Tan,

Peng Wang,

Zexiang Xu,

Li Fuxin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Tan, Hao and Wang, Peng and Xu, Zexiang and Fuxin, Li}, title = {Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {370-380} }
Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding: Wanying Zhou,

Zhuo Chen,

Jianzhi Lu,

Chenxi Ma,

Weimin Tan,

Bo Yan; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wanying and Chen, Zhuo and Lu, Jianzhi and Ma, Chenxi and Tan, Weimin and Yan, Bo}, title = {Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2721-2730} }
HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion: Aihua Mao,

Jun Yang,

Yong-Jin Liu,

Ying He; [pdf]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Aihua and Yang, Jun and Liu, Yong-Jin and He, Ying}, title = {HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {149-159} }
DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework: Yani Zhang,

Dongming Wu,

Hao Shi,

Yingfei Liu,

Tiancai Wang,

Xingping Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yani and Wu, Dongming and Shi, Hao and Liu, Yingfei and Wang, Tiancai and Dong, Xingping}, title = {DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3103-3113} }
Shape and Texture Recognition in Large Vision-Language Models: Sagi Eppel,

Mor Bismut,

Alona Strugatski; [pdf] [supp]
[bibtex]
@InProceedings{Eppel_2026_CVPR, author = {Eppel, Sagi and Bismut, Mor and Strugatski, Alona}, title = {Shape and Texture Recognition in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1839-1849} }
TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution: Honghui Xu,

Chuangjie Fang,

Yiqun Meng,

Jiawei Jiang,

Sixian Chan,

Shiqing Zhang,

Jianwei Zheng; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Honghui and Fang, Chuangjie and Meng, Yiqun and Jiang, Jiawei and Chan, Sixian and Zhang, Shiqing and Zheng, Jianwei}, title = {TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1670-1679} }
An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning: Pengzhou Chen,

Qiling Tang,

XinYu Chai,

Rong Liu,

Zhi Li,

Liman Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhou and Tang, Qiling and Chai, XinYu and Liu, Rong and Li, Zhi and Liu, Liman}, title = {An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3241-3251} }
M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?: Haolong Yan,

Kaijun Tan,

Yeqing Shen,

Xin Huang,

Jia Wang,

Zheng Ge,

Xiangyu Zhang,

Si Li,

Daxin Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haolong and Tan, Kaijun and Shen, Yeqing and Huang, Xin and Wang, Jia and Ge, Zheng and Zhang, Xiangyu and Li, Si and Jiang, Daxin}, title = {M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2731-2741} }
Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection: Wenbing Zhu,

Jianing Liang,

Linjie Cheng,

Yurui Pan,

Zhuhao Chen,

Qingwang Yan,

Yudong Cheng,

Jianghui Zhang,

Mingmin Chi,

Bo Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenbing and Liang, Jianing and Cheng, Linjie and Pan, Yurui and Chen, Zhuhao and Yan, Qingwang and Cheng, Yudong and Zhang, Jianghui and Chi, Mingmin and Peng, Bo}, title = {Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2060-2068} }
AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation: Yulu Wu,

Jiujun Cheng,

Haowen Wang,

Dengyang Suo,

Pei Ren,

Qichao Mao,

Shangce Gao,

Yakun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yulu and Cheng, Jiujun and Wang, Haowen and Suo, Dengyang and Ren, Pei and Mao, Qichao and Gao, Shangce and Huang, Yakun}, title = {AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3082-3091} }
PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation: Xiangyu Li,

Chen Wang,

Yumao Liu,

Dengbo He,

Jiahao Zhang,

Ke Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiangyu and Wang, Chen and Liu, Yumao and He, Dengbo and Zhang, Jiahao and Ma, Ke}, title = {PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1010-1018} }
OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation: Zhishan Zhou,

Siyuan Wei,

Zengran Wang,

Chunjie Wang,

Xiaosheng Yan,

Xiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhishan and Wei, Siyuan and Wang, Zengran and Wang, Chunjie and Yan, Xiaosheng and Liu, Xiao}, title = {OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {233-242} }
RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning: Jingqi Xu,

Jingxi Lu,

Chenghao Li,

Sreetama Sarkar,

Souvik Kundu,

Peter A Beerel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jingqi and Lu, Jingxi and Li, Chenghao and Sarkar, Sreetama and Kundu, Souvik and A Beerel, Peter}, title = {RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2783-2792} }
Tiny Inference-Time Scaling with Latent Verifiers: Davide Bucciarelli,

Evelyn Turri,

Lorenzo Baraldi,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bucciarelli_2026_CVPR, author = {Bucciarelli, Davide and Turri, Evelyn and Baraldi, Lorenzo and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Tiny Inference-Time Scaling with Latent Verifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2873-2882} }
THEval. Evaluation Framework for Talking Head Video Generation: Nabyl Quignon,

Baptiste Chopin,

Yaohui Wang,

Antitza Dantcheva; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Quignon_2026_CVPR, author = {Quignon, Nabyl and Chopin, Baptiste and Wang, Yaohui and Dantcheva, Antitza}, title = {THEval. Evaluation Framework for Talking Head Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1943-1953} }
Jailbreaking Frontier Foundation Models Through Intention Deception: Xinhe Wang,

Katia Sycara,

Yaqi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinhe and Sycara, Katia and Xie, Yaqi}, title = {Jailbreaking Frontier Foundation Models Through Intention Deception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {666-674} }
Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes: Weihang Ran,

Qingtian Zhu,

Mingdeng Cao,

Wei Yuan,

Isao Echizen,

Yinqiang Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Ran_2026_CVPR, author = {Ran, Weihang and Zhu, Qingtian and Cao, Mingdeng and Yuan, Wei and Echizen, Isao and Zheng, Yinqiang}, title = {Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1597-1607} }
Rethinking Compact (<1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions: Christos Kyrkou; [pdf] [supp]
[bibtex]
@InProceedings{Kyrkou_2026_CVPR, author = {Kyrkou, Christos}, title = {Rethinking Compact (\ensuremath{<}1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2710-2720} }
Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark: Rajmund Nagy,

Hendric Voss,

Thanh Hoang-Minh,

Mihail Tsakov,

Teodor Nikolov,

Zeyi Zhang,

Tenglong Ao,

Sicheng Yang,

Shaoli Huang,

Yongkang Cheng,

M. Hamza Mughal,

Rishabh Dabral,

Kiran Chhatre,

Christian Theobalt,

Libin Liu,

Stefan Kopp,

Rachel McDonnell,

Michael Neff,

Taras Kucherenko,

Youngwoo Yoon,

Gustav Eje Henter; [pdf] [supp]
[bibtex]
@InProceedings{Nagy_2026_CVPR, author = {Nagy, Rajmund and Voss, Hendric and Hoang-Minh, Thanh and Tsakov, Mihail and Nikolov, Teodor and Zhang, Zeyi and Ao, Tenglong and Yang, Sicheng and Huang, Shaoli and Cheng, Yongkang and Mughal, M. Hamza and Dabral, Rishabh and Chhatre, Kiran and Theobalt, Christian and Liu, Libin and Kopp, Stefan and McDonnell, Rachel and Neff, Michael and Kucherenko, Taras and Yoon, Youngwoo and Henter, Gustav Eje}, title = {Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2152-2164} }
Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction: Xuanming Jiang,

Baoyi An,

Dingyu Nie,

Haoyu Ren,

Zhengwei Zou,

Yizhe Yang,

Jialie Shen,

Zhiwen Jin,

Xueming Qian,

Zhongyu Yang,

Guoshuai Zhao; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xuanming and An, Baoyi and Nie, Dingyu and Ren, Haoyu and Zou, Zhengwei and Yang, Yizhe and Shen, Jialie and Jin, Zhiwen and Qian, Xueming and Yang, Zhongyu and Zhao, Guoshuai}, title = {Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1608-1617} }
SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses: Zhuohang Jiang,

Xu Yuan,

Haohao Qu,

Shanru Lin,

Kanglong Liu,

Wenqi Fan,

Li Qing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhuohang and Yuan, Xu and Qu, Haohao and Lin, Shanru and Liu, Kanglong and Fan, Wenqi and Qing, Li}, title = {SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2165-2175} }
MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling: Junming Zhang,

Yifei Ji,

Yongxuan Han,

Zhenzhe Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Ji, Yifei and Han, Yongxuan and Zheng, Zhenzhe}, title = {MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2926-2936} }
Catalyst: Out-of-Distribution Detection via Elastic Scaling: Abid Hassan,

Tuan Ngo,

Saad Shafiq,

Nenad Medvidovic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hassan_2026_CVPR, author = {Hassan, Abid and Ngo, Tuan and Shafiq, Saad and Medvidovic, Nenad}, title = {Catalyst: Out-of-Distribution Detection via Elastic Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1618-1628} }
Multimodal Large Language Models as Image Classifiers: Nikita Kisel,

Illia Volkov,

Klara Janouskova,

Jiri Matas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kisel_2026_CVPR, author = {Kisel, Nikita and Volkov, Illia and Janouskova, Klara and Matas, Jiri}, title = {Multimodal Large Language Models as Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1711-1720} }
PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks: Jie Guo,

JunXiang Wu,

Nan An,

Zhen Zhang,

Shuiying Xiang,

Mingjin Zhang,

Yunsong Li,

Yu'e Gao; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Jie and Wu, JunXiang and An, Nan and Zhang, Zhen and Xiang, Shuiying and Zhang, Mingjin and Li, Yunsong and Gao, Yu'e}, title = {PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2367-2376} }
SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes: Fudong Ge,

Dingning Liu,

Hanshi Wang,

Yiwei Zhang,

Jin Gao,

Weiming Hu,

Zhipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Fudong and Liu, Dingning and Wang, Hanshi and Zhang, Yiwei and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {900-909} }
CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery: Tung-I Chen,

Lingdong Wang,

Subhransu Maji,

Ramesh K. Sitaraman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tung-I and Wang, Lingdong and Maji, Subhransu and Sitaraman, Ramesh K.}, title = {CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {457-467} }
Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data: Ji Woong Kim,

Ke Wang,

Zipeng Fu,

Sirui Chen,

Cong zhao,

Jeff Lai,

Chelsea Finn; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Ji Woong and Wang, Ke and Fu, Zipeng and Chen, Sirui and zhao, Cong and Lai, Jeff and Finn, Chelsea}, title = {Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1515-1524} }
SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters: Shohei Tanaka,

Atsushi Hashimoto,

Yoshitaka Ushiku; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tanaka_2026_CVPR, author = {Tanaka, Shohei and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2753-2762} }
Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs: Mona Gandhi,

K.J. Joseph,

Srinivasan Parthasarathy,

Sayan Nag; [pdf] [supp]
[bibtex]
@InProceedings{Gandhi_2026_CVPR, author = {Gandhi, Mona and Joseph, K.J. and Parthasarathy, Srinivasan and Nag, Sayan}, title = {Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1786-1796} }
Learning a Particle Dynamics Model with Real-World Videos: Chanho Kim,

Suhas V. Sumukh,

Li Fuxin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Chanho and Sumukh, Suhas V. and Fuxin, Li}, title = {Learning a Particle Dynamics Model with Real-World Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {498-507} }
Guided Lensless Polarization Imaging: Noa Kraicer,

Erez Yosef,

Raja Giryes; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kraicer_2026_CVPR, author = {Kraicer, Noa and Yosef, Erez and Giryes, Raja}, title = {Guided Lensless Polarization Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1252-1262} }
DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks: Jacob Piland,

Christopher Sweet,

Adam Czajka; [pdf] [arXiv]
[bibtex]
@InProceedings{Piland_2026_CVPR, author = {Piland, Jacob and Sweet, Christopher and Czajka, Adam}, title = {DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1201-1210} }
dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning: Yingzi Ma,

Yulong Cao,

Wenhao Ding,

Shuibai Zhang,

Yan Wang,

Boris Ivanovic,

Ming Jiang,

Marco Pavone,

Chaowei Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yingzi and Cao, Yulong and Ding, Wenhao and Zhang, Shuibai and Wang, Yan and Ivanovic, Boris and Jiang, Ming and Pavone, Marco and Xiao, Chaowei}, title = {dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1050-1061} }
GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design: Xiaolei Zhou,

Chuangjie Fang,

Jie Wu,

Jingyi Yang,

Boyi Lin,

Jianwei Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xiaolei and Fang, Chuangjie and Wu, Jie and Yang, Jingyi and Lin, Boyi and Zheng, Jianwei}, title = {GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {243-252} }
Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement: Pu Li,

Huafeng Li,

Yafei Zhang,

Yu Liu,

Wen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Pu and Li, Huafeng and Zhang, Yafei and Liu, Yu and Wang, Wen}, title = {Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1304-1313} }
CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models: Fawaz Sammani,

Jonas Fischer,

Nikos Deligiannis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sammani_2026_CVPR, author = {Sammani, Fawaz and Fischer, Jonas and Deligiannis, Nikos}, title = {CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3262-3272} }
From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images: Fei Yu,

Yu Liu,

Luyang Tang,

Mingchao Sun,

Zengye Ge,

Rui Bu,

Yuchao Jin,

Haisen Zhao,

He Sun,

Yangyan Li,

Mu Xu,

Wenzheng Chen,

Baoquan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Fei and Liu, Yu and Tang, Luyang and Sun, Mingchao and Ge, Zengye and Bu, Rui and Jin, Yuchao and Zhao, Haisen and Sun, He and Li, Yangyan and Xu, Mu and Chen, Wenzheng and Chen, Baoquan}, title = {From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {391-402} }
From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models: Yukang Feng,

Wenxiao Wu,

Jianwen Sun,

Chuanhao Li,

Fanrui Zhang,

Zizhen Li,

Jiaxin Ai,

Sizhuo Zhou,

Yifan Chang,

Changxin Gao,

Shenglin Zhang,

Kaipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yukang and Wu, Wenxiao and Sun, Jianwen and Li, Chuanhao and Zhang, Fanrui and Li, Zizhen and Ai, Jiaxin and Zhou, Sizhuo and Chang, Yifan and Gao, Changxin and Zhang, Shenglin and Zhang, Kaipeng}, title = {From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2220-2229} }
M^3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering: Jiayi Li,

Yuxuan Hu,

Haoran Geng,

Xiangyu Chen,

Chuhao Zhou,

Ziteng Cui,

Jianfei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Hu, Yuxuan and Geng, Haoran and Chen, Xiangyu and Zhou, Chuhao and Cui, Ziteng and Yang, Jianfei}, title = {M{\textasciicircum}3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3070-3081} }
Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction: Xian-Hua Han; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xian-Hua}, title = {Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1294-1303} }
WGS: Watertight Geometry Standardization for Scalable 3D Generation: Dehao Hao,

Tanghui Jia,

Kaiyi Zhang,

Weikai Chen,

Zeyu Hu,

Yingda Yin,

Runze Zhang,

Lingting Zhu,

Li Yuan,

Xin Wang,

Long Quan; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Dehao and Jia, Tanghui and Zhang, Kaiyi and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Zhang, Runze and Zhu, Lingting and Yuan, Li and Wang, Xin and Quan, Long}, title = {WGS: Watertight Geometry Standardization for Scalable 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {569-578} }
ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning: Boran Wang,

Xinming Wang,

Yi Chen,

Xiang Li,

Jian Xu,

Jing Yuan,

Cheng-Lin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Boran and Wang, Xinming and Chen, Yi and Li, Xiang and Xu, Jian and Yuan, Jing and Liu, Cheng-Lin}, title = {ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2773-2782} }
Dual Strategies for Test-Time Adaptation: Nam Nguyen Phuong,

Duc Nguyen The Minh,

Phi Le Nguyen,

Ehsan Abbasnejad,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Phuong_2026_CVPR, author = {Phuong, Nam Nguyen and Minh, Duc Nguyen The and Le Nguyen, Phi and Abbasnejad, Ehsan and Hoai, Minh}, title = {Dual Strategies for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2483-2492} }
CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion: Akshit Jindal,

Saket Anand,

Chetan Arora,

Vikram Goyal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jindal_2026_CVPR, author = {Jindal, Akshit and Anand, Saket and Arora, Chetan and Goyal, Vikram}, title = {CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {716-725} }
The Mechanics of CNN Filtering with Rectification: Liam Frija-Altarac,

Matthew Toews; [pdf] [supp]
[bibtex]
@InProceedings{Frija-Altarac_2026_CVPR, author = {Frija-Altarac, Liam and Toews, Matthew}, title = {The Mechanics of CNN Filtering with Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1690-1699} }
AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging: Zuchi Bazarvaani,

Seung-Ho Lee,

Jeongmin Ahn,

Donghyeon Jeon,

Inho Kang,

Seung-Hoon Na; [pdf]
[bibtex]
@InProceedings{Bazarvaani_2026_CVPR, author = {Bazarvaani, Zuchi and Lee, Seung-Ho and Ahn, Jeongmin and Jeon, Donghyeon and Kang, Inho and Na, Seung-Hoon}, title = {AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2700-2709} }
Re^2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement: Jiakun Zheng,

Ting Xiao,

Shiqin Cao,

Xinran Li,

Zhe Wang,

Chenjia Bai; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jiakun and Xiao, Ting and Cao, Shiqin and Li, Xinran and Wang, Zhe and Bai, Chenjia}, title = {Re{\textasciicircum}2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1378-1387} }
Fast Generative DeOcclusion for Visual Geometry and Robotics: Jieneng Chen,

Tiezheng Zhang,

Xiwei Xuan,

Ju He,

Yifan Yin,

Haojun Shi,

Suyu Ye,

Xinyi Li,

Ruisheng Yuan,

Tianmin Shu,

Alan Yuille; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jieneng and Zhang, Tiezheng and Xuan, Xiwei and He, Ju and Yin, Yifan and Shi, Haojun and Ye, Suyu and Li, Xinyi and Yuan, Ruisheng and Shu, Tianmin and Yuille, Alan}, title = {Fast Generative DeOcclusion for Visual Geometry and Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1314-1324} }
LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map: Wei Luo,

Xiaohan Wang,

Yuehu Liu; [pdf]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Wei and Wang, Xiaohan and Liu, Yuehu}, title = {LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1462-1471} }
VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction: Yu Hu,

Chong Cheng,

Sicheng Yu,

Xiaoyang Guo,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Cheng, Chong and Yu, Sicheng and Guo, Xiaoyang and Wang, Hao}, title = {VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {414-424} }
Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams: Kaiyuan Chen,

Shuangyu Xie,

Andrew Goldberg,

Ken Goldberg; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Kaiyuan and Xie, Shuangyu and Goldberg, Andrew and Goldberg, Ken}, title = {Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1452-1461} }
Object Pose Transformer: Unifying Unseen Object Pose Estimation: Weihang Li,

Lorenzo Garattoni,

Fabien Despinoy,

Nassir Navab,

Benjamin Busam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weihang and Garattoni, Lorenzo and Despinoy, Fabien and Navab, Nassir and Busam, Benjamin}, title = {Object Pose Transformer: Unifying Unseen Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {436-446} }
Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance: Junyang Chen,

Haomin Ni,

Hanjiang Lai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Ni, Haomin and Lai, Hanjiang}, title = {Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {819-828} }
Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining: Jie Xu,

Na Zhao; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jie and Zhao, Na}, title = {Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {327-337} }
Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments: Chihiro Noguchi,

Takaki Yamamoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noguchi_2026_CVPR, author = {Noguchi, Chihiro and Yamamoto, Takaki}, title = {Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1096-1105} }
Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media: Megha Mariam K.M,

Vineeth N. Balasubramanian,

C.V. Jawahar; [pdf] [supp]
[bibtex]
@InProceedings{K.M_2026_CVPR, author = {K.M, Megha Mariam and Balasubramanian, Vineeth N. and Jawahar, C.V.}, title = {Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2079-2088} }
Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering: Powei Liao,

Jiro Abe,

Kazumine Ogura; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Powei and Abe, Jiro and Ogura, Kazumine}, title = {Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {109-118} }
Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration: Yujie Chen,

Haotong Qin,

Zhang Zhang,

Michele Magno,

Luca Benini,

Yawei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yujie and Qin, Haotong and Zhang, Zhang and Magno, Michele and Benini, Luca and Li, Yawei}, title = {Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2524-2533} }
Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP: Yusung Ro,

Jaehyun Choi,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ro_2026_CVPR, author = {Ro, Yusung and Choi, Jaehyun and Kim, Junmo}, title = {Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3252-3261} }
Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs: Rongqian chen,

Allison Andreyev,

Yanming Xiu,

Joshua Chilukuri,

Shunav Sen,

Mahdi Imani,

Bin Li,

Maria Gorlatova,

Gang Tan,

Tian Lan; [pdf]
[bibtex]
@InProceedings{chen_2026_CVPR, author = {chen, Rongqian and Andreyev, Allison and Xiu, Yanming and Chilukuri, Joshua and Sen, Shunav and Imani, Mahdi and Li, Bin and Gorlatova, Maria and Tan, Gang and Lan, Tian}, title = {Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {799-808} }
WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting: Lezhong Wang,

Mehmet Onurcan Kaya,

Siavash Arjomand Bigdeli,

Jeppe Revall Frisvad; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lezhong and Kaya, Mehmet Onurcan and Bigdeli, Siavash Arjomand and Frisvad, Jeppe Revall}, title = {WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2007-2016} }
Context-Aware Semantic Segmentation via Stage-Wise Attention: Antoine Carreaud,

Elias Naha,

Arthur Chansel,

Nina Lahellec,

Jan Skaloud,

Adrien Gressin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Carreaud_2026_CVPR, author = {Carreaud, Antoine and Naha, Elias and Chansel, Arthur and Lahellec, Nina and Skaloud, Jan and Gressin, Adrien}, title = {Context-Aware Semantic Segmentation via Stage-Wise Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2680-2690} }
Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach: Xincheng Wang,

Hanchi Sun,

Wenjun Sun,

Kejun Xue,

Wangqiu Zhou,

Jianbo Zhang,

Wei Sun,

Dandan Zhu,

Xiongkuo Min,

Jun Jia,

Zhijun Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xincheng and Sun, Hanchi and Sun, Wenjun and Xue, Kejun and Zhou, Wangqiu and Zhang, Jianbo and Sun, Wei and Zhu, Dandan and Min, Xiongkuo and Jia, Jun and Fang, Zhijun}, title = {Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2230-2239} }
Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers: Ziyi Guo,

Zhou Liu,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyi and Liu, Zhou and Zhang, Wentao}, title = {Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1996-2006} }
OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models: Yuping Yan,

Yuhan Xie,

Yuanshuai Li,

Yingchao Yu,

Lingjuan Lyu,

Yaochu Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Yuping and Xie, Yuhan and Li, Yuanshuai and Yu, Yingchao and Lyu, Lingjuan and Jin, Yaochu}, title = {OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1965-1975} }
ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization: Hao Cao,

Chengbin Liang,

Wenqi Guo,

Zhijin Qin,

Jungong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Hao and Liang, Chengbin and Guo, Wenqi and Qin, Zhijin and Han, Jungong}, title = {ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2915-2925} }
In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection: Yunxuan Li,

Bohao Liu,

Yanxia Wu,

Rongsheng Li; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yunxuan and Liu, Bohao and Wu, Yanxia and Li, Rongsheng}, title = {In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2398-2407} }
Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator: Xiankang He,

Dongyan Guo,

Hongji Li,

Ying Cui,

Libo Weng,

Ruibo Li,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xiankang and Guo, Dongyan and Li, Hongji and Cui, Ying and Weng, Libo and Li, Ruibo and Zhang, Chi}, title = {Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {591-601} }
PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning: Wenxiang Xie,

Anpei Chen,

Haoming Yu,

Yujun Shen,

Weiwei Xu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Wenxiang and Chen, Anpei and Yu, Haoming and Shen, Yujun and Xu, Weiwei}, title = {PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3156-3166} }
Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features: Minseo Seong,

Youngwook Kim; [pdf]
[bibtex]
@InProceedings{Seong_2026_CVPR, author = {Seong, Minseo and Kim, Youngwook}, title = {Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2473-2482} }
Name That Part: 3D Part Segmentation and Naming: Soumava Paul,

Prakhar Kaushik,

Ankit Vaidya,

Anand Bhattad,

Alan Yuille; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paul_2026_CVPR, author = {Paul, Soumava and Kaushik, Prakhar and Vaidya, Ankit and Bhattad, Anand and Yuille, Alan}, title = {Name That Part: 3D Part Segmentation and Naming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1808-1817} }
FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series: Devansh Garg; [pdf]
[bibtex]
@InProceedings{Garg_2026_CVPR, author = {Garg, Devansh}, title = {FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1934-1942} }
Intelligent Photo Retouching with Language Model-Based Artist Agents: Haoyu Chen,

Keda Tao,

YiZao Wang,

Xinlei Wang,

Lei Zhu,

Jinjin Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haoyu and Tao, Keda and Wang, YiZao and Wang, Xinlei and Zhu, Lei and Gu, Jinjin}, title = {Intelligent Photo Retouching with Language Model-Based Artist Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1240-1251} }
A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action: Kaidong Zhang,

Jian Zhang,

Rongtao Xu,

Yu Sun,

Youpeng Wen,

Shuoshuo Xue,

Xiaoyu Guo,

Minghao Guo,

Weijia Liufu,

Liu Zihou,

Kangyi Ji,

Zihang Li,

Ruiyi Chen,

Meng Cao,

Jingming Zhang,

Shen Zhao,

Xiaojun Chang,

Feng Zheng,

Ivan Laptev,

Xiaodan Liang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaidong and Zhang, Jian and Xu, Rongtao and Sun, Yu and Wen, Youpeng and Xue, Shuoshuo and Guo, Xiaoyu and Guo, Minghao and Liufu, Weijia and Zihou, Liu and Ji, Kangyi and Li, Zihang and Chen, Ruiyi and Cao, Meng and Zhang, Jingming and Zhao, Shen and Chang, Xiaojun and Zheng, Feng and Laptev, Ivan and Liang, Xiaodan}, title = {A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1503-1514} }
3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization: Sizhe Song,

Yankuan Chi,

Shuhan Zhong,

S.-H. Gary Chan; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Sizhe and Chi, Yankuan and Zhong, Shuhan and Chan, S.-H. Gary}, title = {3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {360-369} }
Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers: Syeda Fiza Rubab,

Arslan Abdul Ghaffar,

Malik Junaid Jami Gul,

Sheriff Murtala,

Ingyu Lee,

Gyu Sang Choi; [pdf] [supp]
[bibtex]
@InProceedings{Rubab_2026_CVPR, author = {Rubab, Syeda Fiza and Ghaffar, Arslan Abdul and Gul, Malik Junaid Jami and Murtala, Sheriff and Lee, Ingyu and Choi, Gyu Sang}, title = {Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2844-2851} }
Phantasia: Context-Adaptive Backdoors in Vision Language Models: Nam Duong Tran,

Phi Le Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Nam Duong and Le Nguyen, Phi}, title = {Phantasia: Context-Adaptive Backdoors in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {695-704} }
DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation: Haiduo Huang,

Jiangcheng Song,

Yadong Zhang,

Guansu Wang,

Pengju Ren; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Haiduo and Song, Jiangcheng and Zhang, Yadong and Wang, Guansu and Ren, Pengju}, title = {DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2111-2120} }
Efficient Document Parsing via Parallel Token Prediction: Lei Li,

Ze Zhao,

Meng Li,

Zhongwang Lun,

Yi Yuan,

Xingjing Lu,

Zheng Wei,

Jiang Bian,

Zang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lei and Zhao, Ze and Li, Meng and Lun, Zhongwang and Yuan, Yi and Lu, Xingjing and Wei, Zheng and Bian, Jiang and Li, Zang}, title = {Efficient Document Parsing via Parallel Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2763-2772} }
Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment: Qinlin Hu,

Mingliang Zhou,

Xingran Liao; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Qinlin and Zhou, Mingliang and Liao, Xingran}, title = {Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2629-2638} }
NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation: Lizhi Xiong,

Jianguo Feng,

Ziqiang Li,

Jun Li,

Weiwei Jiang,

Zhangjie Fu; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Lizhi and Feng, Jianguo and Li, Ziqiang and Li, Jun and Jiang, Weiwei and Fu, Zhangjie}, title = {NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {675-684} }
4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System: Bo Xu,

Jun Dai,

Yutian Chen,

Linning Xu,

Mulin Yu,

Yujin Wang,

Shi Guo,

Xinyi Le,

Tianfan Xue; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Dai, Jun and Chen, Yutian and Xu, Linning and Yu, Mulin and Wang, Yujin and Guo, Shi and Le, Xinyi and Xue, Tianfan}, title = {4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {43-53} }
ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization: Yanzhao Yu,

Yi Ding,

Peijun Tang,

Haotian Yang,

Xianbiao Qi,

Jianan Wang,

Xueqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yanzhao and Ding, Yi and Tang, Peijun and Yang, Haotian and Qi, Xianbiao and Wang, Jianan and Wang, Xueqian}, title = {ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1441-1451} }
Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency: Jingi Kim,

Wonjun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jingi and Kim, Wonjun}, title = {Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {139-148} }
Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving: Mingbo Dai,

Han Yan,

Bolun Zhang,

Wu Ran,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Mingbo and Yan, Han and Zhang, Bolun and Ran, Wu and Ma, Chao}, title = {Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {181-190} }
MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition: Haote Yang,

Hui Wang,

Chen Zhu,

Jingchao Wang,

Linye Li,

Hongbin Lai,

Huijie Ao,

Yongxuan Lv,

Jiang Wu,

Jiaxing Sun,

Lua Chen,

Yuanyuan Cao,

Ruijie Zhang,

Shengxin Lu,

Lijun Wu,

Bin Wang,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haote and Wang, Hui and Zhu, Chen and Wang, Jingchao and Li, Linye and Lai, Hongbin and Ao, Huijie and Lv, Yongxuan and Wu, Jiang and Sun, Jiaxing and Chen, Lua and Cao, Yuanyuan and Zhang, Ruijie and Lu, Shengxin and Wu, Lijun and Wang, Bin and He, Conghui}, title = {MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1924-1933} }
Latent Domain Modeling Improves Robustness to Geographic Shifts: Ruth Crasto,

Esther Rolf; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Crasto_2026_CVPR, author = {Crasto, Ruth and Rolf, Esther}, title = {Latent Domain Modeling Improves Robustness to Geographic Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2419-2428} }
MADrive: Memory-Augmented Driving Scene Modeling: Polina Karpikova,

Daniil Selikhanovych,

Kirill Struminsky,

Ruslan Musaev,

Maria Golitsyna,

Dmitry Baranchuk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karpikova_2026_CVPR, author = {Karpikova, Polina and Selikhanovych, Daniil and Struminsky, Kirill and Musaev, Ruslan and Golitsyna, Maria and Baranchuk, Dmitry}, title = {MADrive: Memory-Augmented Driving Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {54-65} }
INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models: Parsa Madinei,

Ryan Solgi,

Ziqi Wen,

Jonathan Skaza,

Miguel Eckstein,

Ramtin Pedarsani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Madinei_2026_CVPR, author = {Madinei, Parsa and Solgi, Ryan and Wen, Ziqi and Skaza, Jonathan and Eckstein, Miguel and Pedarsani, Ramtin}, title = {INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2947-2956} }
Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning: Kyle Domico,

Jean-Charles Noirot Ferrand,

Ryan Sheatsley,

Eric Pauley,

Josiah Hanna,

Patrick McDaniel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Domico_2026_CVPR, author = {Domico, Kyle and Ferrand, Jean-Charles Noirot and Sheatsley, Ryan and Pauley, Eric and Hanna, Josiah and McDaniel, Patrick}, title = {Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {647-655} }
3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction: Beiyuan Zhang,

Hesong Li,

Ruiwen Shao,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beiyuan and Li, Hesong and Shao, Ruiwen and Fu, Ying}, title = {3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {306-315} }
Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework: Yilin Wang,

Dawei Luo,

Shuai Chen,

Feng Xu,

Jiachi Wang,

Zunlei Feng,

Yijun Bei; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yilin and Luo, Dawei and Chen, Shuai and Xu, Feng and Wang, Jiachi and Feng, Zunlei and Bei, Yijun}, title = {Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2314-2323} }
Enriching Knowledge Distillation with Cross-Modal Teacher Fusion: Amir M. Mansourian,

Amir Mohammad Babaei,

Shohreh Kasaei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mansourian_2026_CVPR, author = {Mansourian, Amir M. and Babaei, Amir Mohammad and Kasaei, Shohreh}, title = {Enriching Knowledge Distillation with Cross-Modal Teacher Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2893-2903} }
Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection: Yan Li,

Zhouchao Fu,

Wenbin Lu,

Junjie Zheng,

Junnan Xu,

Junjie Liao,

Jianwei Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Fu, Zhouchao and Lu, Wenbin and Zheng, Junjie and Xu, Junnan and Liao, Junjie and Zheng, Jianwei}, title = {Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1546-1555} }
MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training: Zhenhan Yin,

Xuanhan Wang,

Jiahao Jiang,

Kaiyuan Deng,

Pengqi Chen,

Shuangle Li,

Chong Liu,

Xing Xu,

Jingkuan Song,

Lianli Gao,

Heng Tao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Zhenhan and Wang, Xuanhan and Jiang, Jiahao and Deng, Kaiyuan and Chen, Pengqi and Li, Shuangle and Liu, Chong and Xu, Xing and Song, Jingkuan and Gao, Lianli and Shen, Heng Tao}, title = {MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1535-1545} }
3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework: Tobias Sautter,

Jan-Niklas Dihlmann,

Hendrik P A Lensch; [pdf] [arXiv]
[bibtex]
@InProceedings{Sautter_2026_CVPR, author = {Sautter, Tobias and Dihlmann, Jan-Niklas and A Lensch, Hendrik P}, title = {3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {528-537} }
GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes: Beibei Lin,

Xiao Cao,

Jingyuan Guo,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Beibei and Cao, Xiao and Guo, Jingyuan and Tan, Robby T.}, title = {GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {275-284} }
Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving: Zhexi Lian,

Haoran Wang,

Xuerun Yan,

Weimeng Lin,

Xianhong Zhang,

Yongyu Chen,

Jia Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Zhexi and Wang, Haoran and Yan, Xuerun and Lin, Weimeng and Zhang, Xianhong and Chen, Yongyu and Hu, Jia}, title = {Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {920-930} }
From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation: Rafael Pablos Sarabia,

Joachim Nyborg,

Morten Birk,

Ira Assent; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarabia_2026_CVPR, author = {Sarabia, Rafael Pablos and Nyborg, Joachim and Birk, Morten and Assent, Ira}, title = {From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2606-2617} }
Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use: Ding Yi,

Yanzhao Yu,

Xili Dai,

Xianbiao Qi,

Peiwen Sun,

Xueqian Wang,

Xiangyu Yue,

Jianan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Ding and Yu, Yanzhao and Dai, Xili and Qi, Xianbiao and Sun, Peiwen and Wang, Xueqian and Yue, Xiangyu and Wang, Jianan}, title = {Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1346-1357} }
EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks: Lulin Liu,

Dayou Li,

Yiqing Liang,

Sicong Jiang,

Hitesh Vijay,

Hezhen Hu,

Xuhai Xu,

Zirui Liu,

Srinivas Shakkottai,

Manling Li,

Zhiwen Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Lulin and Li, Dayou and Liang, Yiqing and Jiang, Sicong and Vijay, Hitesh and Hu, Hezhen and Xu, Xuhai and Liu, Zirui and Shakkottai, Srinivas and Li, Manling and Fan, Zhiwen}, title = {EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2017-2027} }
Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling: Pengfei Yang,

Sifu Luo,

Feng Wu,

Fan Zhou,

Ting Zhong; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Pengfei and Luo, Sifu and Wu, Feng and Zhou, Fan and Zhong, Ting}, title = {Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1148-1158} }
VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving: Levente Tempfli,

Esteban Rivera,

Markus Lienkamp; [pdf] [supp]
[bibtex]
@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Rivera, Esteban and Lienkamp, Markus}, title = {VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {960-969} }
Event-Based Optical Flow Leveraging Precise Event Timing: Hugh Greatorex,

Elisabetta Chicca; [pdf] [supp]
[bibtex]
@InProceedings{Greatorex_2026_CVPR, author = {Greatorex, Hugh and Chicca, Elisabetta}, title = {Event-Based Optical Flow Leveraging Precise Event Timing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3178-3188} }
Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering: Noah Frahm,

Prakrut Patel,

Yue Zhang,

Shoubin Yu,

Mohit Bansal,

Roni Sengupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Frahm_2026_CVPR, author = {Frahm, Noah and Patel, Prakrut and Zhang, Yue and Yu, Shoubin and Bansal, Mohit and Sengupta, Roni}, title = {Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3114-3123} }
MPM: Mutual Pair Merging for Efficient Vision Transformers: Simon Ravé,

Pejman Rasti,

David Rousseau; [pdf] [supp]
[bibtex]
@InProceedings{Rave_2026_CVPR, author = {Rav\'e, Simon and Rasti, Pejman and Rousseau, David}, title = {MPM: Mutual Pair Merging for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2998-3008} }
Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior: Jiaying Lin,

Shuquan Ye,

Dan Xu,

Wanli Ouyang,

Rynson W. H. Lau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiaying and Ye, Shuquan and Xu, Dan and Ouyang, Wanli and Lau, Rynson W. H.}, title = {Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1818-1827} }
AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning: Qilin Xiang,

Qilin Fan,

Xinrui Li,

Tianfu Wang,

Shuting Qiu,

Yue Niu; [pdf]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qilin and Fan, Qilin and Li, Xinrui and Wang, Tianfu and Qiu, Shuting and Niu, Yue}, title = {AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3019-3028} }
InstructTable: Improving Table Structure Recognition Through Instruction: Boming Chen,

Zining Wang,

Zhentao Guo,

Jianqiang Liu,

Chen Duan,

Yu Gu,

Kai zhou,

Pengfei Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Boming and Wang, Zining and Guo, Zhentao and Liu, Jianqiang and Duan, Chen and Gu, Yu and zhou, Kai and Yan, Pengfei}, title = {InstructTable: Improving Table Structure Recognition Through Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2742-2752} }
AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models: Jintao Lin,

Bowen Dong,

Weikang Shi,

Chenyang Lei,

Suiyun Zhang,

Rui Liu,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jintao and Dong, Bowen and Shi, Weikang and Lei, Chenyang and Zhang, Suiyun and Liu, Rui and Liu, Xihui}, title = {AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1797-1807} }
LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets: Woo-Jin Jung,

Dong-Hee Paek,

Seung-Hyun Kong; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Woo-Jin and Paek, Dong-Hee and Kong, Seung-Hyun}, title = {LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {889-899} }
DINO-VO: Learning Where to Focus for Enhanced State Estimation: Qi Chen,

Guanghao Li,

Sijia Hu,

Xin Gao,

Junpeng Ma,

Xiangyang Xue,

Jian Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qi and Li, Guanghao and Hu, Sijia and Gao, Xin and Ma, Junpeng and Xue, Xiangyang and Pu, Jian}, title = {DINO-VO: Learning Where to Focus for Enhanced State Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1556-1566} }
GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis: Thomas Tanay,

Mohammed Brahimi,

Michal Nazarczuk,

Qingwen Zhang,

Sibi Catley-Chandar,

Arthur Moreau,

Zhensong Zhang,

Eduardo Pérez-Pellitero; [pdf] [arXiv]
[bibtex]
@InProceedings{Tanay_2026_CVPR, author = {Tanay, Thomas and Brahimi, Mohammed and Nazarczuk, Michal and Zhang, Qingwen and Catley-Chandar, Sibi and Moreau, Arthur and Zhang, Zhensong and P\'erez-Pellitero, Eduardo}, title = {GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {348-359} }
The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning: Garima Arya Yadav,

Nilay Yilmaz,

Yezhou Yang; [pdf] [supp]
[bibtex]
@InProceedings{Yadav_2026_CVPR, author = {Yadav, Garima Arya and Yilmaz, Nilay and Yang, Yezhou}, title = {The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2069-2078} }
SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction: Kang Han,

Wei Xiang,

Lu Yu,

Mathew Wyatt,

Gaowen Liu,

Ramana Rao Kompella; [pdf] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Kang and Xiang, Wei and Yu, Lu and Wyatt, Mathew and Liu, Gaowen and Kompella, Ramana Rao}, title = {SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {12-21} }
SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration: Zhimin Shao,

Abhay Yadav,

Rama Chellappa,

Cheng Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Zhimin and Yadav, Abhay and Chellappa, Rama and Peng, Cheng}, title = {SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {253-263} }
Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data: Dumindu Tissera,

Omar Awadallah,

Muhammad Umair Danish,

Ayan Sadhu,

Katarina Grolinger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tissera_2026_CVPR, author = {Tissera, Dumindu and Awadallah, Omar and Danish, Muhammad Umair and Sadhu, Ayan and Grolinger, Katarina}, title = {Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2429-2439} }
ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph: Junhao Cai,

Deyu Zeng,

Junhao Pang,

Lini Li,

Xiaopin Zhong,

Zongze Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Junhao and Zeng, Deyu and Pang, Junhao and Li, Lini and Zhong, Xiaopin and Wu, Zongze}, title = {ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {295-305} }
UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics: Joseph Raj Vishal,

Nagasiri Poluri,

Katha Naik,

Rutuja Patil,

Kashyap Hegde Kota,

Krishna Vinod,

Prithvi Jai Ramesh,

Mohammad Farhadi,

Yezhou Yang,

Bharatesh Chakravarthi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vishal_2026_CVPR, author = {Vishal, Joseph Raj and Poluri, Nagasiri and Naik, Katha and Patil, Rutuja and Kota, Kashyap Hegde and Vinod, Krishna and Ramesh, Prithvi Jai and Farhadi, Mohammad and Yang, Yezhou and Chakravarthi, Bharatesh}, title = {UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1862-1871} }
MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments: Zhiyu Huang,

Zewei Zhou,

Tianhui Cai,

Yun Zhang,

Jiaqi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhiyu and Zhou, Zewei and Cai, Tianhui and Zhang, Yun and Ma, Jiaqi}, title = {MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {878-888} }
MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs: Zhilin Lin,

Zhihui Zhang,

Shiliang Sun,

Jing Zhao,

Hao Yang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zhilin and Zhang, Zhihui and Sun, Shiliang and Zhao, Jing and Yang, Hao}, title = {MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2089-2099} }
Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM: Cabrel Wouladje,

Golden Tendekai Mumanikidzwa,

Md Apon Islam,

Huiying Xu,

Hongbo Li,

Wenzhe Tan,

Zhendong Chen,

Xinzhong Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wouladje_2026_CVPR, author = {Wouladje, Cabrel and Mumanikidzwa, Golden Tendekai and Islam, Md Apon and Xu, Huiying and Li, Hongbo and Tan, Wenzhe and Chen, Zhendong and Zhu, Xinzhong}, title = {Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1567-1576} }
Instant Colorization of Gaussian Splats: Daniel Lieber,

Alexander Mock,

Nils Wandel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lieber_2026_CVPR, author = {Lieber, Daniel and Mock, Alexander and Wandel, Nils}, title = {Instant Colorization of Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {170-180} }
Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning: Yibing Wei,

Sudeep Katakol,

Manuel Brack,

Jinhong Lin,

Haoyue Bai,

Yu-Teng Li,

Richard Zhang,

Eli Shechtman,

Hareesh Ravi,

Ajinkya Kale; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yibing and Katakol, Sudeep and Brack, Manuel and Lin, Jinhong and Bai, Haoyue and Li, Yu-Teng and Zhang, Richard and Shechtman, Eli and Ravi, Hareesh and Kale, Ajinkya}, title = {Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1883-1892} }
Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment: Huaiyuan Qin,

Muli Yang,

Gabriel James Goenawan,

Kai Wang,

Zheng Wang,

Peng Hu,

Xi Peng,

Hongyuan Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Huaiyuan and Yang, Muli and Goenawan, Gabriel James and Wang, Kai and Wang, Zheng and Hu, Peng and Peng, Xi and Zhu, Hongyuan}, title = {Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3029-3039} }
Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams: Zhenghui Guo,

Yuanbin Man,

Junyuan Sheng,

Bowen Lin,

Ahmed Ahmed,

Bo Jiang,

Boyuan Zhang,

Miao Yin,

Sian Jin,

Omprakash Gnawali,

Chengming Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Zhenghui and Man, Yuanbin and Sheng, Junyuan and Lin, Bowen and Ahmed, Ahmed and Jiang, Bo and Zhang, Boyuan and Yin, Miao and Jin, Sian and Gnawali, Omprakash and Zhang, Chengming}, title = {Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3060-3069} }
LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images: Yilong Liu,

Wanhua Li,

Chen Zhu-Tian,

Hanspeter Pfister; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yilong and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {191-201} }
World Model Robustness via Surprise Recognition: Geigh Zollicoffer,

Tanush Chopra,

Mingkuan Yan,

Xiaoxu Ma,

Kenneth Eaton,

Mark Riedl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zollicoffer_2026_CVPR, author = {Zollicoffer, Geigh and Chopra, Tanush and Yan, Mingkuan and Ma, Xiaoxu and Eaton, Kenneth and Riedl, Mark}, title = {World Model Robustness via Surprise Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3146-3155} }
U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration: Michael Smith,

Frank P. Ferrie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Smith_2026_CVPR, author = {Smith, Michael and Ferrie, Frank P.}, title = {U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1850-1861} }
Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety: Younggun Kim,

Sirnam Swetha,

Fazil Kagdi,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Younggun and Swetha, Sirnam and Kagdi, Fazil and Shah, Mubarak}, title = {Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2100-2110} }
PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models: Jinkai Li,

Zhenliang Zhang,

Lifeng Fan,

Wei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinkai and Zhang, Zhenliang and Fan, Lifeng and Wang, Wei}, title = {PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1976-1985} }
LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers: Md Abtahi Majeed Chowdhury,

Md Rifat Ur Rahman,

Akil Ahmad Taki; [pdf] [supp]
[bibtex]
@InProceedings{Chowdhury_2026_CVPR, author = {Chowdhury, Md Abtahi Majeed and Rahman, Md Rifat Ur and Taki, Akil Ahmad}, title = {LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1640-1649} }
Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression: Haihao Zhang,

Siwei Dong,

Jianing Li,

Rui Zhao,

Yunjian Zhang,

Geng Qin,

Lin Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haihao and Dong, Siwei and Li, Jianing and Zhao, Rui and Zhang, Yunjian and Qin, Geng and Zhu, Lin}, title = {Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3220-3230} }
SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense: Patryk Krukowski,

Lukasz Gorczyca,

Piotr Helm,

Kamil Ksiazek,

Przemyslaw Spurek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krukowski_2026_CVPR, author = {Krukowski, Patryk and Gorczyca, Lukasz and Helm, Piotr and Ksiazek, Kamil and Spurek, Przemyslaw}, title = {SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2377-2386} }
MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection: Xinying Li,

Junfeng Jing,

Tong Wu,

Tian Gao,

Zhihong Sheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinying and Jing, Junfeng and Wu, Tong and Gao, Tian and Sheng, Zhihong}, title = {MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2585-2595} }
NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks: Pengcheng Chen,

Yue Hu,

Wenhao Li,

Nicole M Gunderson,

Andrew Feng,

Zhenglong Sun,

Peter Beerel,

Eric J Seibel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Pengcheng and Hu, Yue and Li, Wenhao and Gunderson, Nicole M and Feng, Andrew and Sun, Zhenglong and Beerel, Peter and Seibel, Eric J}, title = {NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {403-413} }
FLToM: Robust Federated Learning with Theory-of-Mind Structure: Tianshu Xiao,

Liu Yang,

Sichang Guo,

Qilong Wang,

Qinghua Hu; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Tianshu and Yang, Liu and Guo, Sichang and Wang, Qilong and Hu, Qinghua}, title = {FLToM: Robust Federated Learning with Theory-of-Mind Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2503-2513} }
RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes: Michael Baltaxe,

Dan Levi,

Sagie Benaim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baltaxe_2026_CVPR, author = {Baltaxe, Michael and Levi, Dan and Benaim, Sagie}, title = {RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {558-568} }
G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction: Chao Ning,

Naoto Yokoya; [pdf] [supp]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Yokoya, Naoto}, title = {G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {518-527} }
Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents: Nishad Singhi,

Christian Bialas,

Snehal Jauhri,

Vignesh Prasad,

Georgia Chalvatzaki,

Marcus Rohrbach,

Anna Rohrbach; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singhi_2026_CVPR, author = {Singhi, Nishad and Bialas, Christian and Jauhri, Snehal and Prasad, Vignesh and Chalvatzaki, Georgia and Rohrbach, Marcus and Rohrbach, Anna}, title = {Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3124-3135} }; Back