Findings

Back

Spatial Transcriptomics as Images for Large-Scale Pretraining
Yishun Zhu,
Jiaxin Qi,
Jian Wang,
Yuhua Zheng,
Jianqiang Huang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yishun and Qi, Jiaxin and Wang, Jian and Zheng, Yuhua and Huang, Jianqiang}, title = {Spatial Transcriptomics as Images for Large-Scale Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1191-1200} }

GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting
Jialin Li,
Bin Fu,
Ruiping Wang,
Xilin Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jialin and Fu, Bin and Wang, Ruiping and Chen, Xilin}, title = {GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {264-274} }

Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency
Erwei Zhao,
Haijin Zeng,
Weiwei Xiao,
Shijie Cao,
Qiben Shan,
Shaocong Wu,
Jingyong Su,
Jie Liu
[pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Erwei and Zeng, Haijin and Xiao, Weiwei and Cao, Shijie and Shan, Qiben and Wu, Shaocong and Su, Jingyong and Liu, Jie}, title = {Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2937-2946} }

AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens
Purvish Jajal,
Nicholas John Eliopoulos,
Benjamin Shiue-Hal Chou,
George K Thiruvathukal,
Yung-Hsiang Lu,
James C. Davis
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Lu, Yung-Hsiang and Davis, James C.}, title = {AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2618-2628} }

Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach
Youngjun Choi,
Joonseong Kang,
Sungjun Lim,
Kyungwoo Song
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Youngjun and Kang, Joonseong and Lim, Sungjun and Song, Kyungwoo}, title = {Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2387-2397} }

CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning
Meiqi Wang,
Longnyu Xu,
Jun Liu,
Hewu Li,
Han Qiu
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Meiqi and Xu, Longnyu and Liu, Jun and Li, Hewu and Qiu, Han}, title = {CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1482-1491} }

AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models
Tianyi Yan,
Tao Tang,
Xingtai Gui,
Yongkang Li,
Jiasen Zheng,
Weiyao Huang,
Lingdong Kong,
Wencheng Han,
Xia Zhou,
Xueyang Zhang,
Yifei Zhan,
Kun Zhan,
Cheng-zhong Xu,
Jianbing Shen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Tianyi and Tang, Tao and Gui, Xingtai and Li, Yongkang and Zheng, Jiasen and Huang, Weiyao and Kong, Lingdong and Han, Wencheng and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1085-1095} }

What Matters for Scalable and Robust Learning in End-to-End Driving Planners?
David Holtz,
Niklas Hanselmann,
Simon Doll,
Marius Cordts,
Bernt Schiele
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Holtz_2026_CVPR, author = {Holtz, David and Hanselmann, Niklas and Doll, Simon and Cordts, Marius and Schiele, Bernt}, title = {What Matters for Scalable and Robust Learning in End-to-End Driving Planners?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {931-941} }

Active Exploration for Sparse Visual Localization
Johanna Lidholm,
Ludvig Dillén,
Zuzana Kukelova,
Torsten Sattler,
Viktor Larsson
[pdf] [supp]
[bibtex]
@InProceedings{Lidholm_2026_CVPR, author = {Lidholm, Johanna and Dill\'en, Ludvig and Kukelova, Zuzana and Sattler, Torsten and Larsson, Viktor}, title = {Active Exploration for Sparse Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {338-347} }

IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes
Carl Lindström,
Mahan Rafidashti,
Maryam Fatemi,
Lars Hammarstrand,
Martin R. Oswald,
Lennart Svensson
[pdf] [supp]
[bibtex]
@InProceedings{Lindstrom_2026_CVPR, author = {Lindstr\"om, Carl and Rafidashti, Mahan and Fatemi, Maryam and Hammarstrand, Lars and Oswald, Martin R. and Svensson, Lennart}, title = {IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {316-326} }

MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation
Mengxi Wu,
Long Zhou,
Zhixia Li,
Adrian Kwan,
Denis Laprise,
Hengyi Huang,
Xiaqing Wu,
Shuang Wu
[pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mengxi and Zhou, Long and Li, Zhixia and Kwan, Adrian and Laprise, Denis and Huang, Hengyi and Wu, Xiaqing and Wu, Shuang}, title = {MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {990-999} }

SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers
Oded Schlesinger,
Amirhossein Farzam,
J. Matias Di Martino,
Guillermo Sapiro
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schlesinger_2026_CVPR, author = {Schlesinger, Oded and Farzam, Amirhossein and Di Martino, J. Matias and Sapiro, Guillermo}, title = {SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2335-2345} }

CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities
Moritz Nottebaum,
Matteo Dunnhofer,
Christian Micheloni
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nottebaum_2026_CVPR, author = {Nottebaum, Moritz and Dunnhofer, Matteo and Micheloni, Christian}, title = {CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2493-2502} }

2D Triangle Splatting for Direct Differentiable Mesh Training
Kaifeng Sheng,
Zheng Zhou,
Yingliang Peng,
Qianwei Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Kaifeng and Zhou, Zheng and Peng, Yingliang and Wang, Qianwei}, title = {2D Triangle Splatting for Direct Differentiable Mesh Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {285-294} }

DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings
Yoonhwa Jung,
Junryu Fu,
Mani Golparvar-Fard
[pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Yoonhwa and Fu, Junryu and Golparvar-Fard, Mani}, title = {DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2121-2130} }

Debiased One-Shot NAS Via Density-Aware Sampling
Mehraveh Javan Roshtkhari,
Matthew Toews,
Marco Pedersoli
[pdf] [supp]
[bibtex]
@InProceedings{Roshtkhari_2026_CVPR, author = {Roshtkhari, Mehraveh Javan and Toews, Matthew and Pedersoli, Marco}, title = {Debiased One-Shot NAS Via Density-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2357-2366} }

SLAD : Shared LoRA Adapters for Task Specific Distillation
Reda Bensaid,
Yassir Bendou,
Vincent Gripon,
François Leduc-Primeau
[pdf] [supp]
[bibtex]
@InProceedings{Bensaid_2026_CVPR, author = {Bensaid, Reda and Bendou, Yassir and Gripon, Vincent and Leduc-Primeau, Fran\c{c}ois}, title = {SLAD : Shared LoRA Adapters for Task Specific Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2968-2977} }

OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution
Chong Xia,
Fangfu Liu,
Yule Wang,
Yize Pang,
Yueqi Duan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Liu, Fangfu and Wang, Yule and Pang, Yize and Duan, Yueqi}, title = {OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {66-76} }

AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks
Xinghan Liu,
Xiao Liu,
Yifan Xu,
Jiaqi Fu,
Jiayu Huang,
Yixuan Liu,
Yuxiao Dong,
Jie Tang
[pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinghan and Liu, Xiao and Xu, Yifan and Fu, Jiaqi and Huang, Jiayu and Liu, Yixuan and Dong, Yuxiao and Tang, Jie}, title = {AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1700-1710} }

CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels
Ping Guo,
Chengzhou Li,
Guanchen Meng,
Qi Jia,
Jinyuan Liu,
Zhu Liu,
Yu Liu,
Zhongxuan Luo,
Xin Fan
[pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ping and Li, Chengzhou and Meng, Guanchen and Jia, Qi and Liu, Jinyuan and Liu, Zhu and Liu, Yu and Luo, Zhongxuan and Fan, Xin}, title = {CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1660-1669} }

Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models
Shan Zhao,
Zhao Yang,
Tianwei Yan,
Yusong Gong,
Qian Wan,
Shizhao Chen,
Shezheng Song,
Chengyu Wang,
Meng Wang
[pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shan and Yang, Zhao and Yan, Tianwei and Gong, Yusong and Wan, Qian and Chen, Shizhao and Song, Shezheng and Wang, Chengyu and Wang, Meng}, title = {Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2304-2313} }

KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving
Rufan Bai,
Tianyi Xue,
Tiantian Zhou,
Weiwei Wu,
Changle Li,
Yuhuan Lu
[pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Rufan and Xue, Tianyi and Zhou, Tiantian and Wu, Weiwei and Li, Changle and Lu, Yuhuan}, title = {KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {980-989} }

GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution
Zehong Ke,
Zhiyuan Liu,
Yuning Wang,
Jinhao Li,
Junkai Jiang,
Yanbo Jiang,
Zhenhua Xu,
Jianqiang Wang
[pdf] [supp]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Zehong and Liu, Zhiyuan and Wang, Yuning and Li, Jinhao and Jiang, Junkai and Jiang, Yanbo and Xu, Zhenhua and Wang, Jianqiang}, title = {GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1029-1038} }

A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models
Duo Li,
Zuhao Yang,
Xiaoqin Zhang,
Ling Shao,
Shijian Lu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Duo and Yang, Zuhao and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2823-2833} }

PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting
Kangmin Seo,
MinKyu Lee,
Tae-Young Kim,
ByeongCheol Lee,
JoonSeoung An,
Jae-Pil Heo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Kangmin and Lee, MinKyu and Kim, Tae-Young and Lee, ByeongCheol and An, JoonSeoung and Heo, Jae-Pil}, title = {PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {468-477} }

FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation
Dian Shao,
Zhengzheng Xu,
Peiyang Wang,
Like Liu,
Yule Wang,
Jieqi Shi,
Jing Huo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Dian and Xu, Zhengzheng and Wang, Peiyang and Liu, Like and Wang, Yule and Shi, Jieqi and Huo, Jing}, title = {FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1325-1334} }

Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks
Yu Yan,
Sheng Sun,
Shengjia Cheng,
Teli Liu,
Mingfeng Li,
Min Liu
[pdf] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Yu and Sun, Sheng and Cheng, Shengjia and Liu, Teli and Li, Mingfeng and Liu, Min}, title = {Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {837-846} }

Plug-and-Think: Structured Reasoning for Vision-Language-Action Models
Kaikai Wei,
Di wen,
Xinhai Li,
Senwei Xiang
[pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Kaikai and wen, Di and Li, Xinhai and Xiang, Senwei}, title = {Plug-and-Think: Structured Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3136-3145} }

Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering
Lin Fan,
Yafei Ou,
Zhipeng Deng,
Pengyu Dai,
Chongxian Hou,
Jiale Yan,
Yaqian Li,
Kaiwen Long,
Xun Gong,
Masayuki Ikebe,
Yefeng Zheng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Lin and Ou, Yafei and Deng, Zhipeng and Dai, Pengyu and Hou, Chongxian and Yan, Jiale and Li, Yaqian and Long, Kaiwen and Gong, Xun and Ikebe, Masayuki and Zheng, Yefeng}, title = {Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2049-2059} }

Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings
Fatemeh Akbarian,
Anahita Baninajjar,
Yingyi Zhang,
Ananth Balashankar,
Amir Aminifar
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Akbarian_2026_CVPR, author = {Akbarian, Fatemeh and Baninajjar, Anahita and Zhang, Yingyi and Balashankar, Ananth and Aminifar, Amir}, title = {Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {748-757} }

LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates
Minkwan Kim,
Seungmin Lee,
Junho Kim,
Young Min Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minkwan and Lee, Seungmin and Kim, Junho and Kim, Young Min}, title = {LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {488-497} }

VideoMatGen: PBR Materials through Joint Generative Modeling
Jon Hasselgren,
Milos Hasan,
Zheng Zeng,
Jacob Munkberg
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hasselgren_2026_CVPR, author = {Hasselgren, Jon and Hasan, Milos and Zeng, Zheng and Munkberg, Jacob}, title = {VideoMatGen: PBR Materials through Joint Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2440-2450} }

Environmental Understanding Vision-language Model for Embodied Agent
Jinsik Bang,
Jaeyeon Bae,
Donggyu Lee,
Siyeol Jung,
Taehwan Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bang_2026_CVPR, author = {Bang, Jinsik and Bae, Jaeyeon and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Environmental Understanding Vision-language Model for Embodied Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3092-3102} }

Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints
Jungkon Kim,
Cheolseung Jung,
Jong-Min Choi,
Juseong Lee
[pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jungkon and Jung, Cheolseung and Choi, Jong-Min and Lee, Juseong}, title = {Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {847-856} }

Learning Vision-Language-Action World Models for Autonomous Driving
Guoqing Wang,
Pin Tang,
Xiangxuan Ren,
Guodongfang Zhao,
Bailan Feng,
Chao Ma
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Guoqing and Tang, Pin and Ren, Xiangxuan and Zhao, Guodongfang and Feng, Bailan and Ma, Chao}, title = {Learning Vision-Language-Action World Models for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1073-1084} }

Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark
Yibin Ye,
Xichao Teng,
Shuo Chen,
Leqi Liu,
Kun Wang,
Xiaokai Song,
Zhang Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yibin and Teng, Xichao and Chen, Shuo and Liu, Leqi and Wang, Kun and Song, Xiaokai and Li, Zhang}, title = {Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1731-1741} }

BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities
Akash Sharma,
Chinmay Mhatre,
Sankalp Gawali,
Ruthvik Bokkasam,
Brij Sharma,
Vishwajeet Pattanaik,
Punit Rathore,
Raghu Krishnapuram,
Vijay Gopal Kovvali,
Anirban Chakraborty,
Yogesh Simmhan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Akash and Mhatre, Chinmay and Gawali, Sankalp and Bokkasam, Ruthvik and Sharma, Brij and Pattanaik, Vishwajeet and Rathore, Punit and Krishnapuram, Raghu and Kovvali, Vijay Gopal and Chakraborty, Anirban and Simmhan, Yogesh}, title = {BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2240-2249} }

Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention
Wenhu Zhang,
Yiming Wu,
Huanyu Wang,
YaoYang Liu,
Huanzhang Dou,
Senqiao Yang,
Sitong Wu,
Hanbin Zhao,
Jiaya Jia
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenhu and Wu, Yiming and Wang, Huanyu and Liu, YaoYang and Dou, Huanzhang and Yang, Senqiao and Wu, Sitong and Zhao, Hanbin and Jia, Jiaya}, title = {Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2852-2862} }

Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training
Yanyun Wang,
Qingqing Ye,
Li Liu,
Zi Liang,
Haibo Hu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yanyun and Ye, Qingqing and Liu, Li and Liang, Zi and Hu, Haibo}, title = {Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {768-778} }

Memorization in 3D Shape Generation: An Empirical Study
Shu Pu,
Boya Zeng,
Kaichen Zhou,
Mengyu Wang,
Zhuang Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pu_2026_CVPR, author = {Pu, Shu and Zeng, Boya and Zhou, Kaichen and Wang, Mengyu and Liu, Zhuang}, title = {Memorization in 3D Shape Generation: An Empirical Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1828-1838} }

Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings
Luca Parolari,
Nicla Faccioli,
Lamberto Ballan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parolari_2026_CVPR, author = {Parolari, Luca and Faccioli, Nicla and Ballan, Lamberto}, title = {Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1742-1751} }

DaMN: Deleting and Migrating Normalization Layers from Transformers
Alexey Ryabykin,
Irina Zhelavskaya,
Egor Shvetsov,
Alexey Rukhovich,
Nikita Okhotnikov,
Artem Khrapov,
Evgeny Burnaev,
Vladimir Mikhailovich Kryzhanovskiy
[pdf] [supp]
[bibtex]
@InProceedings{Ryabykin_2026_CVPR, author = {Ryabykin, Alexey and Zhelavskaya, Irina and Shvetsov, Egor and Rukhovich, Alexey and Okhotnikov, Nikita and Khrapov, Artem and Burnaev, Evgeny and Kryzhanovskiy, Vladimir Mikhailovich}, title = {DaMN: Deleting and Migrating Normalization Layers from Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2883-2892} }

Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer
Chenhang Cui,
An Zhang,
Yuxin Chen,
Gelei Deng,
Jingnan Zheng,
Zhenkai Liang,
Xiang Wang,
Tat-Seng Chua
[pdf]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Chenhang and Zhang, An and Chen, Yuxin and Deng, Gelei and Zheng, Jingnan and Liang, Zhenkai and Wang, Xiang and Chua, Tat-Seng}, title = {Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2346-2356} }

Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting
Ananta R. Bhattarai,
Helge Rhodin
[pdf] [supp]
[bibtex]
@InProceedings{Bhattarai_2026_CVPR, author = {Bhattarai, Ananta R. and Rhodin, Helge}, title = {Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {602-612} }

TransKV: A Data-Driven Pruning Method for Large Foundation Models
Guangning Xu,
Fanxu Meng,
Ruijie Zhou,
Michael K Ng,
Wenjie Pei,
Muhan Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guangning and Meng, Fanxu and Zhou, Ruijie and Ng, Michael K and Pei, Wenjie and Zhang, Muhan}, title = {TransKV: A Data-Driven Pruning Method for Large Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2451-2461} }

MaMe: Matrix-Based Token Merging
Simin Huo,
Ning Li
[pdf] [supp]
[bibtex]
@InProceedings{Huo_2026_CVPR, author = {Huo, Simin and Li, Ning}, title = {MaMe: Matrix-Based Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2863-2872} }

BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models
Jiayao Wang,
Yiping Zhang,
Mohammad Maruf Hasan,
Xiaoying Lei,
Jiale Zhang,
Junwu Zhu,
Qilin Wu,
Dongfang Zhao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayao and Zhang, Yiping and Hasan, Mohammad Maruf and Lei, Xiaoying and Zhang, Jiale and Zhu, Junwu and Wu, Qilin and Zhao, Dongfang}, title = {BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {705-715} }

Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression
Xiang Liu,
Yimin Zhou,
Jinxiang Wang,
Yujun Huang,
Shuzhao Xie,
Shiyu Qin,
Mingyao Hong,
Jiawei Li,
Yaowei Wang,
Zhi Wang,
Shu-Tao Xia,
Bin Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiang and Zhou, Yimin and Wang, Jinxiang and Huang, Yujun and Xie, Shuzhao and Qin, Shiyu and Hong, Mingyao and Li, Jiawei and Wang, Yaowei and Wang, Zhi and Xia, Shu-Tao and Chen, Bin}, title = {Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2261-2271} }

See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models
Le Thien Phuc Nguyen,
Zhuoran Yu,
Samuel Low Yu Hang,
Subin An,
Jeongik Lee,
Yohan Ban,
SeungEun Chung,
Thanh-Huy Nguyen,
JuWan Maeng,
Soochahn Lee,
Yong Jae Lee
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Le Thien Phuc and Yu, Zhuoran and Hang, Samuel Low Yu and An, Subin and Lee, Jeongik and Ban, Yohan and Chung, SeungEun and Nguyen, Thanh-Huy and Maeng, JuWan and Lee, Soochahn and Lee, Yong Jae}, title = {See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2272-2283} }

RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer
Yoonwoo Ha,
Hyungpil Moon
[pdf] [supp]
[bibtex]
@InProceedings{Ha_2026_CVPR, author = {Ha, Yoonwoo and Moon, Hyungpil}, title = {RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1525-1534} }

GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models
Lianming Huang,
Haibo Hu,
Qiao Li,
Xin He,
Nan Guan,
Chun Jason Xue
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Lianming and Hu, Haibo and Li, Qiao and He, Xin and Guan, Nan and Xue, Chun Jason}, title = {GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2834-2843} }

Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification
Shenghui Yue,
Rui Wang,
Tianyang Xu,
Tao Zhou,
Xiao-Jun Wu,
Josef Kittler
[pdf]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Shenghui and Wang, Rui and Xu, Tianyang and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2639-2648} }

What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters
Shaobo Liu,
Haobo Xiong,
Kai Liu,
Yuna Lin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shaobo and Xiong, Haobo and Liu, Kai and Lin, Yuna}, title = {What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2813-2822} }

RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL
Yinzhou Tang,
Yu Shang,
Yinuo Chen,
Bingwen Wei,
Xin Zhang,
Shu'ang Yu,
Liangzhi Shi,
Chao Yu,
Chen Gao,
Wei Wu,
Yong Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yinzhou and Shang, Yu and Chen, Yinuo and Wei, Bingwen and Zhang, Xin and Yu, Shu'ang and Shi, Liangzhi and Yu, Chao and Gao, Chen and Wu, Wei and Li, Yong}, title = {RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1472-1481} }

IRL-VLA: Vision-Language-Action Training via Reward World Model
Anqing Jiang,
Gao Yu,
Heng Yuwen,
Yiru Wang,
Wang Shuo,
Jiang Hao,
Sun Hao
[pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Anqing and Yu, Gao and Yuwen, Heng and Wang, Yiru and Shuo, Wang and Hao, Jiang and Hao, Sun}, title = {IRL-VLA: Vision-Language-Action Training via Reward World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {970-979} }

Softmax-GS: Generalized Gaussians Learning When to Blend or Bound
Chen Ziwen,
Peng Wang,
Hao Tan,
Zexiang Xu,
Li Fuxin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Wang, Peng and Tan, Hao and Xu, Zexiang and Fuxin, Li}, title = {Softmax-GS: Generalized Gaussians Learning When to Blend or Bound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {508-517} }

On the Feasibility and Opportunity of Autoregressive 3D Object Detection
Zanming Huang,
Jinsu Yoo,
Sooyoung Jeon,
Zhenzhen Liu,
Mark Campbell,
Kilian Q Weinberger,
Bharath Hariharan,
Wei-Lun Chao,
Katie Z Luo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zanming and Yoo, Jinsu and Jeon, Sooyoung and Liu, Zhenzhen and Campbell, Mark and Weinberger, Kilian Q and Hariharan, Bharath and Chao, Wei-Lun and Luo, Katie Z}, title = {On the Feasibility and Opportunity of Autoregressive 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1170-1179} }

LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction
Jiazhou Lin,
Zhongyi Liu,
Ying Shi,
Zhichun Zhao,
Zhuoyu Wang,
Yuhang Zhou,
Huanling Hu,
Guangnan Ye,
Mengtian Li,
Lei Guo
[pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiazhou and Liu, Zhongyi and Shi, Ying and Zhao, Zhichun and Wang, Zhuoyu and Zhou, Yuhang and Hu, Huanling and Ye, Guangnan and Li, Mengtian and Guo, Lei}, title = {LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1629-1639} }

A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing
Maria Stoica,
Abdelrahman Hekal,
Alessio Lomuscio
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stoica_2026_CVPR, author = {Stoica, Maria and Hekal, Abdelrahman and Lomuscio, Alessio}, title = {A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {685-694} }

MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation
Nuolin Sun,
Linyuan Wang,
Haonan Wei,
Lei Li,
Bin Yan
[pdf] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Nuolin and Wang, Linyuan and Wei, Haonan and Li, Lei and Yan, Bin}, title = {MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2691-2699} }

Speed3R: Sparse Feed-forward 3D Reconstruction Models
Weining Ren,
Xiao Tan,
Kai Han
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Weining and Tan, Xiao and Han, Kai}, title = {Speed3R: Sparse Feed-forward 3D Reconstruction Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {119-128} }

Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN
C. Zaghetto,
A. Purim,
W. Oliveira,
J. R. Ribeiro,
H. Nolla,
F. Santos,
M. Chang,
R. H. Vareto
[pdf] [supp]
[bibtex]
@InProceedings{Zaghetto_2026_CVPR, author = {Zaghetto, C. and Purim, A. and Oliveira, W. and Ribeiro, J. R. and Nolla, H. and Santos, F. and Chang, M. and Vareto, R. H.}, title = {Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1221-1229} }

Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection
Hanvitha Saraswathi Mukkamala,
Arun K Pujari
[pdf] [supp]
[bibtex]
@InProceedings{Mukkamala_2026_CVPR, author = {Mukkamala, Hanvitha Saraswathi and Pujari, Arun K}, title = {Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1680-1689} }

GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction
Minsol Kim,
Usman Ali
[pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minsol and Ali, Usman}, title = {GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {33-42} }

AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents
Yue Cao,
Yingyao Wang,
Pi Bu,
Jingxuan Xing,
Wei Jiang,
Zekun Zhu,
Junpeng Ma,
Sashuai Zhou,
Tong Lu,
Jun Song,
Yu Cheng,
Yuning Jiang,
Bo Zheng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yue and Wang, Yingyao and Bu, Pi and Xing, Jingxuan and Jiang, Wei and Zhu, Zekun and Ma, Junpeng and Zhou, Sashuai and Lu, Tong and Song, Jun and Cheng, Yu and Jiang, Yuning and Zheng, Bo}, title = {AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1903-1912} }

C^2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination
Yuyang Chen,
Kaiyan Zhao,
Yiming Wang,
Ming Yang,
Bin Rao,
Zhenning Li
[pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zhao, Kaiyan and Wang, Yiming and Yang, Ming and Rao, Bin and Li, Zhenning}, title = {C{\textasciicircum}2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1128-1137} }

Generative Event Pretraining with Foundation Model Alignment
Jianwen Cao,
Jiaxu Xing,
Nico Messikommer,
Davide Scaramuzza
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Jianwen and Xing, Jiaxu and Messikommer, Nico and Scaramuzza, Davide}, title = {Generative Event Pretraining with Foundation Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3189-3199} }

Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models
Gracjan Goral,
Alicja Ziarko,
Piotr Milos,
Michal Nauman,
Maciej Wolczyk,
Michal Kosinski
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goral_2026_CVPR, author = {Goral, Gracjan and Ziarko, Alicja and Milos, Piotr and Nauman, Michal and Wolczyk, Maciej and Kosinski, Michal}, title = {Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1721-1730} }

BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching
Zhien Dai,
Zhaohui Tang,
Hu Zhang,
Mingjun Pan,
Jin Luo,
Yongfang Xie
[pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Zhien and Tang, Zhaohui and Zhang, Hu and Pan, Mingjun and Luo, Jin and Xie, Yongfang}, title = {BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1-11} }

FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting
Tianhao Xie,
Linlian Jiang,
Xinxin Zuo,
Yang Wang,
Tiberiu Popa
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tianhao and Jiang, Linlian and Zuo, Xinxin and Wang, Yang and Popa, Tiberiu}, title = {FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {202-212} }

Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification
Lulu Fang,
Jiaxiang Qin,
Ruiheng Yan,
Ning Pan,
Haihua Liu,
Xinxin Chen
[pdf]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Lulu and Qin, Jiaxiang and Yan, Ruiheng and Pan, Ning and Liu, Haihua and Chen, Xinxin}, title = {Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2564-2574} }

HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation
Keito Suzuki,
Kunyao Chen,
Lei Wang,
Bang Du,
Runfa Blark Li,
Peng Liu,
Ning Bi,
Truong Nguyen
[pdf] [supp]
[bibtex]
@InProceedings{Suzuki_2026_CVPR, author = {Suzuki, Keito and Chen, Kunyao and Wang, Lei and Du, Bang and Li, Runfa Blark and Liu, Peng and Bi, Ning and Nguyen, Truong}, title = {HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {624-634} }

Channel Correlation Loss for Binary Neural Networks
Xindi Zuo,
Wei Zhang,
Hai Yu,
Zhiliang Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Xindi and Zhang, Wei and Yu, Hai and Zhu, Zhiliang}, title = {Channel Correlation Loss for Binary Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2575-2584} }

JACoP: Joint Alignment for Compliant Multi-Agent Prediction
Qingze Tony Liu,
Alen Mrdovic,
Danrui Li,
Mathew Schwartz,
Sejong Yoon,
Mubbasir Kapadia
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qingze Tony and Mrdovic, Alen and Li, Danrui and Schwartz, Mathew and Yoon, Sejong and Kapadia, Mubbasir}, title = {JACoP: Joint Alignment for Compliant Multi-Agent Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {910-919} }

Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models
Songlong Xing,
Weijie Wang,
Zhengyu Zhao,
Jindong Gu,
Philip Torr,
Nicu Sebe
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Songlong and Wang, Weijie and Zhao, Zhengyu and Gu, Jindong and Torr, Philip and Sebe, Nicu}, title = {Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {737-747} }

Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training
Hu Lin,
Chengjiang Long,
Jiqing Zhang,
Chuanlu Jiang,
Huilin Ge,
Erwei Yin,
Baocai Yin,
Xin Yang
[pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Hu and Long, Chengjiang and Zhang, Jiqing and Jiang, Chuanlu and Ge, Huilin and Yin, Erwei and Yin, Baocai and Yang, Xin}, title = {Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1335-1345} }

Revisiting Articulated Parts Perception in Robot Manipulation
Xiaoqian Wu,
Yejie Guo,
Xiaoyang Chen,
Lixin Yang,
Cewu Lu,
Yong-Lu Li
[pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoqian and Guo, Yejie and Chen, Xiaoyang and Yang, Lixin and Lu, Cewu and Li, Yong-Lu}, title = {Revisiting Articulated Parts Perception in Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1368-1377} }

Vision Language Models are Confused Tourists
Patrick Amadeus Irawan,
Ikhlasul Akmal Hanif,
Muhammad Dehan Al Kautsar,
Genta Indra Winata,
Fajri Koto,
Alham Fikri Aji
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Irawan_2026_CVPR, author = {Irawan, Patrick Amadeus and Hanif, Ikhlasul Akmal and Al Kautsar, Muhammad Dehan and Winata, Genta Indra and Koto, Fajri and Aji, Alham Fikri}, title = {Vision Language Models are Confused Tourists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1763-1773} }

Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs
Tianle Chen,
Chaitanya Chakka,
Arjun Reddy Akula,
Xavier Thomas,
Deepti Ghadiyaram
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tianle and Chakka, Chaitanya and Akula, Arjun Reddy and Thomas, Xavier and Ghadiyaram, Deepti}, title = {Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2142-2151} }

Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling
Camille Biscarrat,
Michaël Gharbi,
Rahul Goel,
Jonathan Ragan-Kelley,
Frédo Durand,
Tzu-Mao Li
[pdf] [supp]
[bibtex]
@InProceedings{Biscarrat_2026_CVPR, author = {Biscarrat, Camille and Gharbi, Micha\"el and Goel, Rahul and Ragan-Kelley, Jonathan and Durand, Fr\'edo and Li, Tzu-Mao}, title = {Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1283-1293} }

From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity
Haoming Liu,
Jinnuo Liu,
Yanhao Li,
Liuyang Bai,
Yunkai Ji,
Yuanhe Guo,
Shenji Wan,
Hongyi Wen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haoming and Liu, Jinnuo and Li, Yanhao and Bai, Liuyang and Ji, Yunkai and Guo, Yuanhe and Wan, Shenji and Wen, Hongyi}, title = {From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2649-2658} }

HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet
Badri N Patro,
Vijay S Agneeswaran
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2408-2418} }

Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering
Xiaobin Deng,
Changyu Diao,
Min Li,
Ruohan Yu,
Duanqing Xu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaobin and Diao, Changyu and Li, Min and Yu, Ruohan and Xu, Duanqing}, title = {Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {223-232} }

Rich Feature Learning via Diversification
Xi Leng,
Yongqiang Chen,
Xiaoying Tang,
Yatao Bian
[pdf] [supp]
[bibtex]
@InProceedings{Leng_2026_CVPR, author = {Leng, Xi and Chen, Yongqiang and Tang, Xiaoying and Bian, Yatao}, title = {Rich Feature Learning via Diversification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2462-2472} }

SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting
Yonghan Lee,
Tsung-Wei Huang,
Shiv Gehlot,
Jaehoon Choi,
Guan-Ming Su,
Dinesh Manocha
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yonghan and Huang, Tsung-Wei and Gehlot, Shiv and Choi, Jaehoon and Su, Guan-Ming and Manocha, Dinesh}, title = {SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {77-87} }

When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization
Md Zarif Hossain,
Awal Ahmed Fime,
Ahmed Imteaj
[pdf] [supp]
[bibtex]
@InProceedings{Hossain_2026_CVPR, author = {Hossain, Md Zarif and Fime, Awal Ahmed and Imteaj, Ahmed}, title = {When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {758-767} }

Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data
Yizhao Xu,
Hongyuan Zhu,
Caiyun Liu,
Tianfu Wang,
Keyu Chen,
Sicheng Xu,
Jiaolong Yang,
Nicholas jing Yuan,
Qi Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yizhao and Zhu, Hongyuan and Liu, Caiyun and Wang, Tianfu and Chen, Keyu and Xu, Sicheng and Yang, Jiaolong and Yuan, Nicholas jing and Zhang, Qi}, title = {Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {635-646} }

BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models
Ba Luan Dang,
Vu Tuan Truong,
Long Bao Le
[pdf] [supp]
[bibtex]
@InProceedings{Dang_2026_CVPR, author = {Dang, Ba Luan and Truong, Vu Tuan and Le, Long Bao}, title = {BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {726-736} }

PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer
David Picard,
Nicolas Dufour,
Lucas Degeorge,
Arijit Ghosh,
Davide Allegro,
Tom Ravaud,
Yohann Perron,
Corentin Sautier,
Zeynep Sonat Baltaci,
Fei Meng,
Syrine Kalleli,
Marta López-Rauhut,
Thibaut Loiseau,
Ségolène Albouy,
Raphael Baena,
Elliot Vincent,
Loic Landrieu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Picard_2026_CVPR, author = {Picard, David and Dufour, Nicolas and Degeorge, Lucas and Ghosh, Arijit and Allegro, Davide and Ravaud, Tom and Perron, Yohann and Sautier, Corentin and Baltaci, Zeynep Sonat and Meng, Fei and Kalleli, Syrine and L\'opez-Rauhut, Marta and Loiseau, Thibaut and Albouy, S\'egol\`ene and Baena, Raphael and Vincent, Elliot and Landrieu, Loic}, title = {PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2544-2553} }

D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models
Wenlun Zhang,
Yunshan Zhong,
Zihao Ding,
Xinyu Li,
Kentaro Yoshioka
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenlun and Zhong, Yunshan and Ding, Zihao and Li, Xinyu and Yoshioka, Kentaro}, title = {D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2978-2987} }

CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios
Hangyu Li,
Bofeng Cao,
Zhaohui Liang,
Wuzhen Li,
Juyoung Oh,
Yuxuan Chen,
Shixiao Liang,
Hang Zhou,
Chengyuan Ma,
Jiaxi Liu,
Zheng Li,
Peng Zhang,
Keke Long,
Maolin Liu,
Jackson Jiang,
Chunlei Yu,
Shengxiang Liu,
Hongkai Yu,
Xiaopeng Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hangyu and Cao, Bofeng and Liang, Zhaohui and Li, Wuzhen and Oh, Juyoung and Chen, Yuxuan and Liang, Shixiao and Zhou, Hang and Ma, Chengyuan and Liu, Jiaxi and Li, Zheng and Zhang, Peng and Long, Keke and Liu, Maolin and Jiang, Jackson and Yu, Chunlei and Liu, Shengxiang and Yu, Hongkai and Li, Xiaopeng}, title = {CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2294-2303} }

RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer
Liu Liu,
Xiaofeng Wang,
Guosheng Zhao,
Keyu Li,
Wenkang Qin,
Jiagang Zhu,
Jiaxiong Qiu,
Guan Huang,
Zhizhong Su
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Liu and Wang, Xiaofeng and Zhao, Guosheng and Li, Keyu and Qin, Wenkang and Zhu, Jiagang and Qiu, Jiaxiong and Huang, Guan and Su, Zhizhong}, title = {RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1410-1420} }

SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction
Kao Zhang,
Tao Song,
Zhihua Hu,
Ming Li,
Xin Ding
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kao and Song, Tao and Hu, Zhihua and Li, Ming and Ding, Xin}, title = {SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2596-2605} }

VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning
Bo Pang,
Chenxi Xu,
Jierui Ren,
Guoping Wang,
Sheng Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2026_CVPR, author = {Pang, Bo and Xu, Chenxi and Ren, Jierui and Wang, Guoping and Li, Sheng}, title = {VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2028-2037} }

RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies
Guillermo Garcia-Cobo,
Maximilian Igl,
Peter Karkus,
Zhejun Zhang,
Michael Watson,
Yuxiao Chen,
Boris Ivanovic,
Marco Pavone
[pdf] [supp]
[bibtex]
@InProceedings{Garcia-Cobo_2026_CVPR, author = {Garcia-Cobo, Guillermo and Igl, Maximilian and Karkus, Peter and Zhang, Zhejun and Watson, Michael and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1000-1009} }

Self-Evolving 3D Scene Generation from a Single Image
Kaizhi Zheng,
Yue Fan,
Jing Gu,
Zishuo Xu,
Xuehai He,
Xin Eric Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Kaizhi and Fan, Yue and Gu, Jing and Xu, Zishuo and He, Xuehai and Wang, Xin Eric}, title = {Self-Evolving 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {579-590} }

SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting
Jikai Wang,
Xingtai Gui,
Jiahao Gong,
Feiyang Tan,
Wencheng Han,
Cheng-Zhong Xu,
Jianbing Shen
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jikai and Gui, Xingtai and Gong, Jiahao and Tan, Feiyang and Han, Wencheng and Xu, Cheng-Zhong and Shen, Jianbing}, title = {SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1039-1049} }

Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness
Debarshi Brahma,
Soma Biswas
[pdf] [supp]
[bibtex]
@InProceedings{Brahma_2026_CVPR, author = {Brahma, Debarshi and Biswas, Soma}, title = {Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {656-665} }

RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection
Ozsel Kilinc,
Cem Tarhan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kilinc_2026_CVPR, author = {Kilinc, Ozsel and Tarhan, Cem}, title = {RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1159-1169} }

MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing
Changho Choi,
Minho Kim,
Jinkyu Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Changho and Kim, Minho and Kim, Jinkyu}, title = {MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2659-2668} }

RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction
Yangfan Zhao,
Hanwei Zhang,
Ke Huang,
Qiufeng Wang,
Zhenzhou Shao,
Dengyu Wu
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yangfan and Zhang, Hanwei and Huang, Ke and Wang, Qiufeng and Shao, Zhenzhou and Wu, Dengyu}, title = {RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1492-1502} }

PolyReal: A Benchmark for Real-World Polymer Science Workflows
Wanhao Liu,
Weida Wang,
Jiaqing Xie,
Suorong Yang,
Jue Wang,
Benteng Chen,
Guangtao Mei,
Zonglin Yang,
Shufei Zhang,
Yuchun Mo,
Lang Cheng,
Jin Zeng,
Houqiang Li,
Wanli Ouyang,
Yuqiang Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Wanhao and Wang, Weida and Xie, Jiaqing and Yang, Suorong and Wang, Jue and Chen, Benteng and Mei, Guangtao and Yang, Zonglin and Zhang, Shufei and Mo, Yuchun and Cheng, Lang and Zeng, Jin and Li, Houqiang and Ouyang, Wanli and Li, Yuqiang}, title = {PolyReal: A Benchmark for Real-World Polymer Science Workflows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1954-1964} }

Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization
Linsi Wu,
Gang Shen,
Xuefei Lv,
Chenglong Wu,
Yuru Pei
[pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Linsi and Shen, Gang and Lv, Xuefei and Wu, Chenglong and Pei, Yuru}, title = {Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2669-2679} }

On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks
Ziad Tariq Muhammad Ali,
Raja Muhammad Atif Azad,
Muhammad Ajmal Azad,
Iain Rice,
Umar Daraz,
Ali Shariq Imran,
James Holyhead
[pdf] [supp]
[bibtex]
@InProceedings{Ali_2026_CVPR, author = {Ali, Ziad Tariq Muhammad and Azad, Raja Muhammad Atif and Azad, Muhammad Ajmal and Rice, Iain and Daraz, Umar and Imran, Ali Shariq and Holyhead, James}, title = {On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {809-818} }

HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints
Shurui Liu,
Weide Chen,
Ancong Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shurui and Chen, Weide and Wu, Ancong}, title = {HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {538-546} }

WildAni4D: Towards 4D Animal Mesh Reconstruction
Gyeongsu Cho,
Hezhen Hu,
Donghyeon Soon,
Changwoo Kang,
Kyungdon Joo
[pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Gyeongsu and Hu, Hezhen and Soon, Donghyeon and Kang, Changwoo and Joo, Kyungdon}, title = {WildAni4D: Towards 4D Animal Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {160-169} }

PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET
Yixin Chen,
Yan Wang,
Wenrui Shao,
Zhaoheng Xie
[pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Wang, Yan and Shao, Wenrui and Xie, Zhaoheng}, title = {PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2534-2543} }

LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting
Yicheng Rui,
Xiao-Wei Duan,
Licai Deng,
Fan Yang,
Zhengming Dang,
Zhengjun Du,
Junhao Peng,
Wenhao Chu,
Umut Mahmut,
Kexin Li,
Yiyun Wu,
Fabo Feng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rui_2026_CVPR, author = {Rui, Yicheng and Duan, Xiao-Wei and Deng, Licai and Yang, Fan and Dang, Zhengming and Du, Zhengjun and Peng, Junhao and Chu, Wenhao and Mahmut, Umut and Li, Kexin and Wu, Yiyun and Feng, Fabo}, title = {LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1774-1785} }

Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation
Junyu Chen,
Md Yousuf Harun,
Christopher Kanan
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junyu and Harun, Md Yousuf and Kanan, Christopher}, title = {Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2284-2293} }

RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes
Zhichao Sun,
Yepeng Liu,
Zhiling Su,
Huachao Zhu,
Yuliang Gu,
Yuda Zou,
Zelong Liu,
Gui-Song Xia,
Bo Du,
Yongchao Xu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhichao and Liu, Yepeng and Su, Zhiling and Zhu, Huachao and Gu, Yuliang and Zou, Yuda and Liu, Zelong and Xia, Gui-Song and Du, Bo and Xu, Yongchao}, title = {RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1752-1762} }

ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction
Sirshapan Mitra,
Yogesh S Rawat
[pdf] [arXiv]
[bibtex]
@InProceedings{Mitra_2026_CVPR, author = {Mitra, Sirshapan and Rawat, Yogesh S}, title = {ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {22-32} }

Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models
Feng Chen,
Chenhui Gou,
Yefei He,
Yang Yang,
Bohan Zhuang,
Qi Wu
[pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Feng and Gou, Chenhui and He, Yefei and Yang, Yang and Zhuang, Bohan and Wu, Qi}, title = {Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3050-3059} }

Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks
Zhiqiu Xia,
Furong Mu,
Qi Li,
Shanshan Zhang,
Jie Gui,
Chunpeng Wang,
Yunan Liu
[pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zhiqiu and Mu, Furong and Li, Qi and Zhang, Shanshan and Gui, Jie and Wang, Chunpeng and Liu, Yunan}, title = {Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1650-1659} }

A Simple Framework for Visual Navigation
Faith Johnson,
Bryan Bo Cao,
Shubham Jain,
Ashwin Ashok,
Kristin Dana
[pdf] [arXiv]
[bibtex]
@InProceedings{Johnson_2026_CVPR, author = {Johnson, Faith and Cao, Bryan Bo and Jain, Shubham and Ashok, Ashwin and Dana, Kristin}, title = {A Simple Framework for Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3167-3177} }

Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models
Yiwei You,
Zan Chen,
Bo Wang,
Xiaofei Zhou
[pdf] [supp]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Yiwei and Chen, Zan and Wang, Bo and Zhou, Xiaofei}, title = {Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2324-2334} }

UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation
Junliang Ye,
Zehuan Huang,
Yansong Qu,
Chunshi Wang,
Yunhan Yang,
Yang Li,
Yawei Luo,
Zhuo Chen,
Sheng Lu,
Jun Zhu,
Chunchao Guo
[pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Junliang and Huang, Zehuan and Qu, Yansong and Wang, Chunshi and Yang, Yunhan and Li, Yang and Luo, Yawei and Chen, Zhuo and Lu, Sheng and Zhu, Jun and Guo, Chunchao}, title = {UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {613-623} }

Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes
Ashish Kumar,
Aarthi S,
Akshay Agarwal
[pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and S, Aarthi and Agarwal, Akshay}, title = {Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {857-866} }

Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding
Huan Hu,
Ping Chen,
Zezhou Chen,
Zhaoxiang Liu,
Zipeng Wang,
Xiang Liu,
Xin Wang,
Kai Wang,
Shiguo Lian
[pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Huan and Chen, Ping and Chen, Zezhou and Liu, Zhaoxiang and Wang, Zipeng and Liu, Xiang and Wang, Xin and Wang, Kai and Lian, Shiguo}, title = {Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1986-1995} }

FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift
Jinshan Lai,
Tingxuan Huang,
Baoyang Jiang,
Liuyu Xiang,
Qiang Ma,
Jianwei Hu
[pdf] [supp]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Jinshan and Huang, Tingxuan and Jiang, Baoyang and Xiang, Liuyu and Ma, Qiang and Hu, Jianwei}, title = {FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2514-2523} }

Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras
Hodaka Kawachi,
Tomoya Nakamura,
Hiroaki Santo,
SaiKiran Kumar Tedla,
Trevor D Canham,
Yasushi Yagi,
Michael S. Brown
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawachi_2026_CVPR, author = {Kawachi, Hodaka and Nakamura, Tomoya and Santo, Hiroaki and Tedla, SaiKiran Kumar and Canham, Trevor D and Yagi, Yasushi and Brown, Michael S.}, title = {Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1273-1282} }

AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization
Zhaorong Wang,
Yoshihiro Kanamori,
Yuki Endo
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhaorong and Kanamori, Yoshihiro and Endo, Yuki}, title = {AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {425-435} }

DeepFakeShield: A Proactive Defense Against Malicious Face Swapping
Saeed Karimi-Bidhendi,
Joseph DeGol,
Eric Wengrowski,
Dominic Roberts,
Kristin Dana
[pdf] [supp]
[bibtex]
@InProceedings{Karimi-Bidhendi_2026_CVPR, author = {Karimi-Bidhendi, Saeed and DeGol, Joseph and Wengrowski, Eric and Roberts, Dominic and Dana, Kristin}, title = {DeepFakeShield: A Proactive Defense Against Malicious Face Swapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {867-877} }

FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer
Shenghe Zheng,
Minyu Zhang,
Tianhao Liu,
Hongzhi Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Shenghe and Zhang, Minyu and Liu, Tianhao and Wang, Hongzhi}, title = {FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2793-2802} }

VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing
Andong Deng,
Dawei Du,
Zhenfang Chen,
Wen Zhong,
Fan Chen,
Guang Chen,
Chia-Wen Kuo,
Longyin Wen,
Chen Chen,
Sijie Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Andong and Du, Dawei and Chen, Zhenfang and Zhong, Wen and Chen, Fan and Chen, Guang and Kuo, Chia-Wen and Wen, Longyin and Chen, Chen and Zhu, Sijie}, title = {VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2187-2196} }

Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting
Zhenhe Liang,
Congqi Cao,
Lanshu Hu,
Liujie Pan
[pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhenhe and Cao, Congqi and Hu, Lanshu and Pan, Liujie}, title = {Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2554-2563} }

ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding
Lingjun Zhao,
Yandong Luo,
James Hays,
Lu Gan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lingjun and Luo, Yandong and Hays, James and Gan, Lu}, title = {ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1398-1409} }

PEPR: Privileged Event-based Predictive Regularization for Domain Generalization
Gabriele Magrini,
Federico Becattini,
Niccolò Biondi,
Pietro Pala
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magrini_2026_CVPR, author = {Magrini, Gabriele and Becattini, Federico and Biondi, Niccol\`o and Pala, Pietro}, title = {PEPR: Privileged Event-based Predictive Regularization for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3209-3219} }

DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting
Minwei Wen,
Yang Wei,
Junhao Xiao,
Xiuli Bi,
Bin Xiao
[pdf]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Minwei and Wei, Yang and Xiao, Junhao and Bi, Xiuli and Xiao, Bin}, title = {DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {779-788} }

CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion
Bingyi Liu,
Chuanhui Zhu,
Hongfei Xue,
Jian Teng,
Jipeng Liu,
Enshu Wang,
Penglin Dai,
Pu Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Bingyi and Zhu, Chuanhui and Xue, Hongfei and Teng, Jian and Liu, Jipeng and Wang, Enshu and Dai, Penglin and Wang, Pu}, title = {CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {99-108} }

Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models
Canyu Chen,
Yuguang Yang,
Zhewen Tan,
Yizhi Wang,
Ruiyi Zhan,
Haiyan Liu,
Xuanyao Mao,
Jason Bao,
Xinyue Tang,
Linlin Yang,
Bingchuan Sun,
Yan Wang,
Baochang Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Canyu and Yang, Yuguang and Tan, Zhewen and Wang, Yizhi and Zhan, Ruiyi and Liu, Haiyan and Mao, Xuanyao and Bao, Jason and Tang, Xinyue and Yang, Linlin and Sun, Bingchuan and Wang, Yan and Zhang, Baochang}, title = {Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1062-1072} }

APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition
Geunyoung Jung,
Soohong Kim,
Inseok Kong,
Jiyoung Jung
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Geunyoung and Kim, Soohong and Kong, Inseok and Jung, Jiyoung}, title = {APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {789-798} }

Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation
Cong Li,
Gong Cheng
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Cong and Cheng, Gong}, title = {Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3040-3049} }

Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks
Yachan Guo,
Jose Lu Gómez,
Danna Xue,
Yi Xiao,
Antonio M. López
[pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yachan and Lu G\'omez, Jose and Xue, Danna and Xiao, Yi and L\'opez, Antonio M.}, title = {Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3231-3240} }

AR4D: Autoregressive 4D Generation from Monocular Videos
Hanxin Zhu,
Tianyu He,
Zhibo Chen
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hanxin and He, Tianyu and Chen, Zhibo}, title = {AR4D: Autoregressive 4D Generation from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {88-98} }

HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects
Radim Spetlik,
Michal Pliska,
Vojtěch Vrba,
Jiří Matas
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Spetlik_2026_CVPR, author = {Spetlik, Radim and Pliska, Michal and Vrba, Vojt\v{e}ch and Matas, Ji\v{r}{\'\i}}, title = {HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3200-3208} }

OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving
Zhenguo Zhang,
Haohan Zheng,
Yishen Wang,
Le Xu,
Tianchen Deng,
Xuefeng Chen,
Qu Chen,
Bo Zhang,
Wuxiong Huang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenguo and Zheng, Haohan and Wang, Yishen and Xu, Le and Deng, Tianchen and Chen, Xuefeng and Chen, Qu and Zhang, Bo and Huang, Wuxiong}, title = {OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1106-1116} }

OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM
Leqian Ding,
Caibo Li,
Yu Guo,
Fei Wang
[pdf]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Leqian and Li, Caibo and Guo, Yu and Wang, Fei}, title = {OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1431-1440} }

Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction
Ziyao Guo,
Kaipeng Zhang,
Michael Qizhe Shieh
[pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyao and Zhang, Kaipeng and Shieh, Michael Qizhe}, title = {Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1230-1239} }

PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception
Armin Maleki,
Hayder Radha
[pdf] [supp]
[bibtex]
@InProceedings{Maleki_2026_CVPR, author = {Maleki, Armin and Radha, Hayder}, title = {PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1138-1147} }

UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models
Yimu Wang,
Weiming Zhuang,
Chen Chen,
Jiabo Huang,
Jingtao Li,
Lingjuan Lyu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yimu and Zhuang, Weiming and Chen, Chen and Huang, Jiabo and Li, Jingtao and Lyu, Lingjuan}, title = {UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2904-2914} }

Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light
Ali Hassani,
Fengzhe Zhou,
Aditya Kane,
Jiannan Huang,
Chieh-Yun Chen,
Min Shi,
Steven Walton,
Markus Hoehnerbach,
Vijay Thakkar,
Mikhail Isaev,
Qinsheng Zhang,
Bing Xu,
Haicheng Wu,
Wen-mei Hwu,
Ming-Yu Liu,
Humphrey Shi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hassani_2026_CVPR, author = {Hassani, Ali and Zhou, Fengzhe and Kane, Aditya and Huang, Jiannan and Chen, Chieh-Yun and Shi, Min and Walton, Steven and Hoehnerbach, Markus and Thakkar, Vijay and Isaev, Mikhail and Zhang, Qinsheng and Xu, Bing and Wu, Haicheng and Hwu, Wen-mei and Liu, Ming-Yu and Shi, Humphrey}, title = {Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3009-3018} }

SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers
Takuro Kawada,
Shunsuke Kitada,
Sota Nemoto,
Hitoshi Iyatomi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawada_2026_CVPR, author = {Kawada, Takuro and Kitada, Shunsuke and Nemoto, Sota and Iyatomi, Hitoshi}, title = {SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2250-2260} }

InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models
Zhiqiang Sheng,
Xumeng Han,
Zhiwei Zhang,
Zenghui Xiong,
Yifan Ding,
Aoxiang Ping,
Xiang Li,
Tong Guo,
Yao Mao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Zhiqiang and Han, Xumeng and Zhang, Zhiwei and Xiong, Zenghui and Ding, Yifan and Ping, Aoxiang and Li, Xiang and Guo, Tong and Mao, Yao}, title = {InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2176-2186} }

RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video
Chenyu Wu,
Wanhua Li,
Chen Zhu-Tian,
Hanspeter Pfister
[pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyu and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {547-557} }

JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search
Dongyun Zou,
Zhuoyang Zhang,
Junyu Chen,
Wenkun He,
Qinhe Peng,
Hanrong Ye,
Yao Lu,
Hongxu Yin,
Yu Wang,
Song Han,
Han Cai
[pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Dongyun and Zhang, Zhuoyang and Chen, Junyu and He, Wenkun and Peng, Qinhe and Ye, Hanrong and Lu, Yao and Yin, Hongxu and Wang, Yu and Han, Song and Cai, Han}, title = {JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2957-2967} }

Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy
Qihong Zhao,
Shaokang Yan,
Zhimin Qiao,
Jinjia Wang,
Bo Xiong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qihong and Yan, Shaokang and Qiao, Zhimin and Wang, Jinjia and Xiong, Bo}, title = {Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {478-487} }

Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation
Chenhao Shi,
Yichen Zhu,
Junjie Wen,
Yefei Chen,
Ziang Liu,
Faming Fang
[pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Chenhao and Zhu, Yichen and Wen, Junjie and Chen, Yefei and Liu, Ziang and Fang, Faming}, title = {Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1388-1397} }

FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning
Alina Devkota,
Jacob Thrasher,
Donald Adjeroh,
Binod Bhattarai,
Prashnna k. Gyawali
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Devkota_2026_CVPR, author = {Devkota, Alina and Thrasher, Jacob and Adjeroh, Donald and Bhattarai, Binod and Gyawali, Prashnna k.}, title = {FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2803-2812} }

iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning
Pan Mu,
Yuchao Zhu,
Shiqi Zhang,
Hanting Yan,
Jinglin Zhang,
Cong Bai
[pdf] [supp]
[bibtex]
@InProceedings{Mu_2026_CVPR, author = {Mu, Pan and Zhu, Yuchao and Zhang, Shiqi and Yan, Hanting and Zhang, Jinglin and Bai, Cong}, title = {iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1587-1596} }

Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment
Yibo Liu,
Ziwei Zhang,
Haozhou Pang,
Menghao Li,
Lanshan He,
Gan Qi
[pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yibo and Zhang, Ziwei and Pang, Haozhou and Li, Menghao and He, Lanshan and Qi, Gan}, title = {Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {381-390} }

Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition
Zhuohao Chen,
Zeng Li,
Yifei Zhang,
Chang Liu,
Yu Zhou
[pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuohao and Li, Zeng and Zhang, Yifei and Liu, Chang and Zhou, Yu}, title = {Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1577-1586} }

Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models
Yue Zhang,
Rui Wang,
Jiehong Lin,
Zhongrui Wang,
Xiaojuan Qi
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yue and Wang, Rui and Lin, Jiehong and Wang, Zhongrui and Qi, Xiaojuan}, title = {Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1358-1367} }

SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes
Jungho Lee,
Minhyeok Lee,
Sunghun Yang,
Minseok Kang,
Sangyoun Lee
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jungho and Lee, Minhyeok and Yang, Sunghun and Kang, Minseok and Lee, Sangyoun}, title = {SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {447-456} }

Affine Bases for Affine Spaces
Gabriel Dogadov,
Marc Alexa
[pdf] [supp]
[bibtex]
@InProceedings{Dogadov_2026_CVPR, author = {Dogadov, Gabriel and Alexa, Marc}, title = {Affine Bases for Affine Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {213-222} }

A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering
Pritham K Jena,
Bhavika Baburaj,
Tushar Anand,
Vedant Dutta,
Vineeth Ulavala,
Sk Aziz Ali
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jena_2026_CVPR, author = {Jena, Pritham K and Baburaj, Bhavika and Anand, Tushar and Dutta, Vedant and Ulavala, Vineeth and Ali, Sk Aziz}, title = {A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1913-1923} }

CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving
Yanlin Jiang,
Yuchen Liu,
Mingren Liu
[pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yanlin and Liu, Yuchen and Liu, Mingren}, title = {CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1117-1127} }

The DeepSpeak Dataset
Sarah Barrington,
Maty Bohacek,
Hany Farid
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barrington_2026_CVPR, author = {Barrington, Sarah and Bohacek, Maty and Farid, Hany}, title = {The DeepSpeak Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1893-1902} }

ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers
Chih-Chung Hsu,
Xin-Di Ma,
Wo-Ting Liao,
Chia-Ming Lee
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsu_2026_CVPR, author = {Hsu, Chih-Chung and Ma, Xin-Di and Liao, Wo-Ting and Lee, Chia-Ming}, title = {ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2988-2997} }

When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers
Aditya Sridhar
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Aditya}, title = {When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {829-836} }

Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors
Vishal Purohit,
Wei Chen,
Qiang Qiu
[pdf] [supp]
[bibtex]
@InProceedings{Purohit_2026_CVPR, author = {Purohit, Vishal and Chen, Wei and Qiu, Qiang}, title = {Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1263-1272} }

Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving
Xuepei Yang,
Mingtao Feng,
Weisheng Dong,
Lin Chen,
Jie Feng,
Fangfang Wu,
Yufan Zhu,
Ajmal Saeed Mian
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xuepei and Feng, Mingtao and Dong, Weisheng and Chen, Lin and Feng, Jie and Wu, Fangfang and Zhu, Yufan and Mian, Ajmal Saeed}, title = {Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {942-951} }

Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding
Jeongwan Shin,
Jaehyeon Kim,
Donguk Ko,
Jaeho Choi
[pdf] [supp]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Jeongwan and Kim, Jaehyeon and Ko, Donguk and Choi, Jaeho}, title = {Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2208-2219} }

FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views
Chaoyi Zhou,
Run Wang,
Feng Luo,
Mert D. Pesé,
Zhiwen Fan,
Yiqi Zhong,
Siyu Huang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chaoyi and Wang, Run and Luo, Feng and Pes\'e, Mert D. and Fan, Zhiwen and Zhong, Yiqi and Huang, Siyu}, title = {FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {129-138} }

HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes
Mauricio Soroco,
Francesco Pittaluga,
Zaid Tasneem,
Abhishek Aich,
Bingbing Zhuang,
Wuyang Chen,
Manmohan Chandraker,
Ziyu Jiang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soroco_2026_CVPR, author = {Soroco, Mauricio and Pittaluga, Francesco and Tasneem, Zaid and Aich, Abhishek and Zhuang, Bingbing and Chen, Wuyang and Chandraker, Manmohan and Jiang, Ziyu}, title = {HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {952-959} }

CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation
Pingrui Lai,
Yanshan Zhou,
Zihao Xie,
Hua Yang
[pdf] [supp]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Pingrui and Zhou, Yanshan and Xie, Zihao and Yang, Hua}, title = {CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2197-2207} }

Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game
Jeonghwan Kim,
Wontaek Kim,
Yidan Lu,
Jin Cheng,
Fatemeh Zargarbashi,
Zicheng Zeng,
Zekun Qi,
Zhiyang Dou,
Nitish Sontakke,
Donghoon Baek,
Li Yi,
Sehoon Ha,
Tianyu Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonghwan and Kim, Wontaek and Lu, Yidan and Cheng, Jin and Zargarbashi, Fatemeh and Zeng, Zicheng and Qi, Zekun and Dou, Zhiyang and Sontakke, Nitish and Baek, Donghoon and Yi, Li and Ha, Sehoon and Li, Tianyu}, title = {Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1421-1430} }

When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI
Yanhui Li,
Qi Zhou,
Zhihong Xu,
Huizhong Guo,
Wenhai Wang,
Dongxia Wang
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanhui and Zhou, Qi and Xu, Zhihong and Guo, Huizhong and Wang, Wenhai and Wang, Dongxia}, title = {When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2038-2048} }

See Tomorrow, Act Today: Foresight-Driven Autonomous Driving
Bozhou Zhang,
Nan Song,
Yuang Wang,
Jiankang Deng,
Xiatian Zhu,
Li Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bozhou and Song, Nan and Wang, Yuang and Deng, Jiankang and Zhu, Xiatian and Zhang, Li}, title = {See Tomorrow, Act Today: Foresight-Driven Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1180-1190} }

GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking
Zekun Qian,
Ruize Han,
Zhixiang Wang,
Liang Wan,
Wei Feng
[pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Zekun and Han, Ruize and Wang, Zhixiang and Wan, Liang and Feng, Wei}, title = {GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1872-1882} }

Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI
Mingjie Li,
Edward Kim,
Yue Zhao,
Ehsan Adeli,
Kilian M. Pohl
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingjie and Kim, Edward and Zhao, Yue and Adeli, Ehsan and Pohl, Kilian M.}, title = {Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1211-1220} }

RoadTones: Tone Controllable Text Generation from Road Event Videos
Chirag Parikh,
Siddhi Pravin Lipare,
Ravi Kiran Sarvadevabhatla
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parikh_2026_CVPR, author = {Parikh, Chirag and Lipare, Siddhi Pravin and Sarvadevabhatla, Ravi Kiran}, title = {RoadTones: Tone Controllable Text Generation from Road Event Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1019-1028} }

SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts
Shun Inadumi,
Shohei Tanaka,
Tosho Hirasawa,
Atsushi Hashimoto,
Koichiro Yoshino,
Yoshitaka Ushiku
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Inadumi_2026_CVPR, author = {Inadumi, Shun and Tanaka, Shohei and Hirasawa, Tosho and Hashimoto, Atsushi and Yoshino, Koichiro and Ushiku, Yoshitaka}, title = {SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2131-2141} }

Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction
Chen Ziwen,
Hao Tan,
Peng Wang,
Zexiang Xu,
Li Fuxin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Tan, Hao and Wang, Peng and Xu, Zexiang and Fuxin, Li}, title = {Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {370-380} }

Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding
Wanying Zhou,
Zhuo Chen,
Jianzhi Lu,
Chenxi Ma,
Weimin Tan,
Bo Yan
[pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wanying and Chen, Zhuo and Lu, Jianzhi and Ma, Chenxi and Tan, Weimin and Yan, Bo}, title = {Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2721-2730} }

HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion
Aihua Mao,
Jun Yang,
Yong-Jin Liu,
Ying He
[pdf]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Aihua and Yang, Jun and Liu, Yong-Jin and He, Ying}, title = {HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {149-159} }

DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework
Yani Zhang,
Dongming Wu,
Hao Shi,
Yingfei Liu,
Tiancai Wang,
Xingping Dong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yani and Wu, Dongming and Shi, Hao and Liu, Yingfei and Wang, Tiancai and Dong, Xingping}, title = {DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3103-3113} }

Shape and Texture Recognition in Large Vision-Language Models
Sagi Eppel,
Mor Bismut,
Alona Strugatski
[pdf] [supp]
[bibtex]
@InProceedings{Eppel_2026_CVPR, author = {Eppel, Sagi and Bismut, Mor and Strugatski, Alona}, title = {Shape and Texture Recognition in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1839-1849} }

TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution
Honghui Xu,
Chuangjie Fang,
Yiqun Meng,
Jiawei Jiang,
Sixian Chan,
Shiqing Zhang,
Jianwei Zheng
[pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Honghui and Fang, Chuangjie and Meng, Yiqun and Jiang, Jiawei and Chan, Sixian and Zhang, Shiqing and Zheng, Jianwei}, title = {TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1670-1679} }

An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning
Pengzhou Chen,
Qiling Tang,
XinYu Chai,
Rong Liu,
Zhi Li,
Liman Liu
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhou and Tang, Qiling and Chai, XinYu and Liu, Rong and Li, Zhi and Liu, Liman}, title = {An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3241-3251} }

M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?
Haolong Yan,
Kaijun Tan,
Yeqing Shen,
Xin Huang,
Jia Wang,
Zheng Ge,
Xiangyu Zhang,
Si Li,
Daxin Jiang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haolong and Tan, Kaijun and Shen, Yeqing and Huang, Xin and Wang, Jia and Ge, Zheng and Zhang, Xiangyu and Li, Si and Jiang, Daxin}, title = {M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2731-2741} }

Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection
Wenbing Zhu,
Jianing Liang,
Linjie Cheng,
Yurui Pan,
Zhuhao Chen,
Qingwang Yan,
Yudong Cheng,
Jianghui Zhang,
Mingmin Chi,
Bo Peng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenbing and Liang, Jianing and Cheng, Linjie and Pan, Yurui and Chen, Zhuhao and Yan, Qingwang and Cheng, Yudong and Zhang, Jianghui and Chi, Mingmin and Peng, Bo}, title = {Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2060-2068} }

AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation
Yulu Wu,
Jiujun Cheng,
Haowen Wang,
Dengyang Suo,
Pei Ren,
Qichao Mao,
Shangce Gao,
Yakun Huang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yulu and Cheng, Jiujun and Wang, Haowen and Suo, Dengyang and Ren, Pei and Mao, Qichao and Gao, Shangce and Huang, Yakun}, title = {AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3082-3091} }

PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation
Xiangyu Li,
Chen Wang,
Yumao Liu,
Dengbo He,
Jiahao Zhang,
Ke Ma
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiangyu and Wang, Chen and Liu, Yumao and He, Dengbo and Zhang, Jiahao and Ma, Ke}, title = {PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1010-1018} }

OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation
Zhishan Zhou,
Siyuan Wei,
Zengran Wang,
Chunjie Wang,
Xiaosheng Yan,
Xiao Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhishan and Wei, Siyuan and Wang, Zengran and Wang, Chunjie and Yan, Xiaosheng and Liu, Xiao}, title = {OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {233-242} }

RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning
Jingqi Xu,
Jingxi Lu,
Chenghao Li,
Sreetama Sarkar,
Souvik Kundu,
Peter A Beerel
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jingqi and Lu, Jingxi and Li, Chenghao and Sarkar, Sreetama and Kundu, Souvik and A Beerel, Peter}, title = {RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2783-2792} }

Tiny Inference-Time Scaling with Latent Verifiers
Davide Bucciarelli,
Evelyn Turri,
Lorenzo Baraldi,
Marcella Cornia,
Lorenzo Baraldi,
Rita Cucchiara
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bucciarelli_2026_CVPR, author = {Bucciarelli, Davide and Turri, Evelyn and Baraldi, Lorenzo and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Tiny Inference-Time Scaling with Latent Verifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2873-2882} }

THEval. Evaluation Framework for Talking Head Video Generation
Nabyl Quignon,
Baptiste Chopin,
Yaohui Wang,
Antitza Dantcheva
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Quignon_2026_CVPR, author = {Quignon, Nabyl and Chopin, Baptiste and Wang, Yaohui and Dantcheva, Antitza}, title = {THEval. Evaluation Framework for Talking Head Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1943-1953} }

Jailbreaking Frontier Foundation Models Through Intention Deception
Xinhe Wang,
Katia Sycara,
Yaqi Xie
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinhe and Sycara, Katia and Xie, Yaqi}, title = {Jailbreaking Frontier Foundation Models Through Intention Deception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {666-674} }

Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes
Weihang Ran,
Qingtian Zhu,
Mingdeng Cao,
Wei Yuan,
Isao Echizen,
Yinqiang Zheng
[pdf] [supp]
[bibtex]
@InProceedings{Ran_2026_CVPR, author = {Ran, Weihang and Zhu, Qingtian and Cao, Mingdeng and Yuan, Wei and Echizen, Isao and Zheng, Yinqiang}, title = {Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1597-1607} }

Rethinking Compact (<1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions
Christos Kyrkou
[pdf] [supp]
[bibtex]
@InProceedings{Kyrkou_2026_CVPR, author = {Kyrkou, Christos}, title = {Rethinking Compact (\ensuremath{<}1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2710-2720} }

Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark
Rajmund Nagy,
Hendric Voss,
Thanh Hoang-Minh,
Mihail Tsakov,
Teodor Nikolov,
Zeyi Zhang,
Tenglong Ao,
Sicheng Yang,
Shaoli Huang,
Yongkang Cheng,
M. Hamza Mughal,
Rishabh Dabral,
Kiran Chhatre,
Christian Theobalt,
Libin Liu,
Stefan Kopp,
Rachel McDonnell,
Michael Neff,
Taras Kucherenko,
Youngwoo Yoon,
Gustav Eje Henter
[pdf] [supp]
[bibtex]
@InProceedings{Nagy_2026_CVPR, author = {Nagy, Rajmund and Voss, Hendric and Hoang-Minh, Thanh and Tsakov, Mihail and Nikolov, Teodor and Zhang, Zeyi and Ao, Tenglong and Yang, Sicheng and Huang, Shaoli and Cheng, Yongkang and Mughal, M. Hamza and Dabral, Rishabh and Chhatre, Kiran and Theobalt, Christian and Liu, Libin and Kopp, Stefan and McDonnell, Rachel and Neff, Michael and Kucherenko, Taras and Yoon, Youngwoo and Henter, Gustav Eje}, title = {Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2152-2164} }

Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction
Xuanming Jiang,
Baoyi An,
Dingyu Nie,
Haoyu Ren,
Zhengwei Zou,
Yizhe Yang,
Jialie Shen,
Zhiwen Jin,
Xueming Qian,
Zhongyu Yang,
Guoshuai Zhao
[pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xuanming and An, Baoyi and Nie, Dingyu and Ren, Haoyu and Zou, Zhengwei and Yang, Yizhe and Shen, Jialie and Jin, Zhiwen and Qian, Xueming and Yang, Zhongyu and Zhao, Guoshuai}, title = {Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1608-1617} }

SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses
Zhuohang Jiang,
Xu Yuan,
Haohao Qu,
Shanru Lin,
Kanglong Liu,
Wenqi Fan,
Li Qing
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhuohang and Yuan, Xu and Qu, Haohao and Lin, Shanru and Liu, Kanglong and Fan, Wenqi and Qing, Li}, title = {SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2165-2175} }

MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling
Junming Zhang,
Yifei Ji,
Yongxuan Han,
Zhenzhe Zheng
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Ji, Yifei and Han, Yongxuan and Zheng, Zhenzhe}, title = {MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2926-2936} }

Catalyst: Out-of-Distribution Detection via Elastic Scaling
Abid Hassan,
Tuan Ngo,
Saad Shafiq,
Nenad Medvidovic
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hassan_2026_CVPR, author = {Hassan, Abid and Ngo, Tuan and Shafiq, Saad and Medvidovic, Nenad}, title = {Catalyst: Out-of-Distribution Detection via Elastic Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1618-1628} }

Multimodal Large Language Models as Image Classifiers
Nikita Kisel,
Illia Volkov,
Klara Janouskova,
Jiri Matas
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kisel_2026_CVPR, author = {Kisel, Nikita and Volkov, Illia and Janouskova, Klara and Matas, Jiri}, title = {Multimodal Large Language Models as Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1711-1720} }

PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks
Jie Guo,
JunXiang Wu,
Nan An,
Zhen Zhang,
Shuiying Xiang,
Mingjin Zhang,
Yunsong Li,
Yu'e Gao
[pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Jie and Wu, JunXiang and An, Nan and Zhang, Zhen and Xiang, Shuiying and Zhang, Mingjin and Li, Yunsong and Gao, Yu'e}, title = {PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2367-2376} }

SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes
Fudong Ge,
Dingning Liu,
Hanshi Wang,
Yiwei Zhang,
Jin Gao,
Weiming Hu,
Zhipeng Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Fudong and Liu, Dingning and Wang, Hanshi and Zhang, Yiwei and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {900-909} }

CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery
Tung-I Chen,
Lingdong Wang,
Subhransu Maji,
Ramesh K. Sitaraman
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tung-I and Wang, Lingdong and Maji, Subhransu and Sitaraman, Ramesh K.}, title = {CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {457-467} }

Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data
Ji Woong Kim,
Ke Wang,
Zipeng Fu,
Sirui Chen,
Cong zhao,
Jeff Lai,
Chelsea Finn
[pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Ji Woong and Wang, Ke and Fu, Zipeng and Chen, Sirui and zhao, Cong and Lai, Jeff and Finn, Chelsea}, title = {Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1515-1524} }

SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters
Shohei Tanaka,
Atsushi Hashimoto,
Yoshitaka Ushiku
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tanaka_2026_CVPR, author = {Tanaka, Shohei and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2753-2762} }

Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs
Mona Gandhi,
K.J. Joseph,
Srinivasan Parthasarathy,
Sayan Nag
[pdf] [supp]
[bibtex]
@InProceedings{Gandhi_2026_CVPR, author = {Gandhi, Mona and Joseph, K.J. and Parthasarathy, Srinivasan and Nag, Sayan}, title = {Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1786-1796} }

Learning a Particle Dynamics Model with Real-World Videos
Chanho Kim,
Suhas V. Sumukh,
Li Fuxin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Chanho and Sumukh, Suhas V. and Fuxin, Li}, title = {Learning a Particle Dynamics Model with Real-World Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {498-507} }

Guided Lensless Polarization Imaging
Noa Kraicer,
Erez Yosef,
Raja Giryes
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kraicer_2026_CVPR, author = {Kraicer, Noa and Yosef, Erez and Giryes, Raja}, title = {Guided Lensless Polarization Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1252-1262} }

DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks
Jacob Piland,
Christopher Sweet,
Adam Czajka
[pdf] [arXiv]
[bibtex]
@InProceedings{Piland_2026_CVPR, author = {Piland, Jacob and Sweet, Christopher and Czajka, Adam}, title = {DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1201-1210} }

dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning
Yingzi Ma,
Yulong Cao,
Wenhao Ding,
Shuibai Zhang,
Yan Wang,
Boris Ivanovic,
Ming Jiang,
Marco Pavone,
Chaowei Xiao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yingzi and Cao, Yulong and Ding, Wenhao and Zhang, Shuibai and Wang, Yan and Ivanovic, Boris and Jiang, Ming and Pavone, Marco and Xiao, Chaowei}, title = {dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1050-1061} }

GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design
Xiaolei Zhou,
Chuangjie Fang,
Jie Wu,
Jingyi Yang,
Boyi Lin,
Jianwei Zheng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xiaolei and Fang, Chuangjie and Wu, Jie and Yang, Jingyi and Lin, Boyi and Zheng, Jianwei}, title = {GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {243-252} }

Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement
Pu Li,
Huafeng Li,
Yafei Zhang,
Yu Liu,
Wen Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Pu and Li, Huafeng and Zhang, Yafei and Liu, Yu and Wang, Wen}, title = {Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1304-1313} }

CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models
Fawaz Sammani,
Jonas Fischer,
Nikos Deligiannis
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sammani_2026_CVPR, author = {Sammani, Fawaz and Fischer, Jonas and Deligiannis, Nikos}, title = {CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3262-3272} }

From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images
Fei Yu,
Yu Liu,
Luyang Tang,
Mingchao Sun,
Zengye Ge,
Rui Bu,
Yuchao Jin,
Haisen Zhao,
He Sun,
Yangyan Li,
Mu Xu,
Wenzheng Chen,
Baoquan Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Fei and Liu, Yu and Tang, Luyang and Sun, Mingchao and Ge, Zengye and Bu, Rui and Jin, Yuchao and Zhao, Haisen and Sun, He and Li, Yangyan and Xu, Mu and Chen, Wenzheng and Chen, Baoquan}, title = {From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {391-402} }

From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models
Yukang Feng,
Wenxiao Wu,
Jianwen Sun,
Chuanhao Li,
Fanrui Zhang,
Zizhen Li,
Jiaxin Ai,
Sizhuo Zhou,
Yifan Chang,
Changxin Gao,
Shenglin Zhang,
Kaipeng Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yukang and Wu, Wenxiao and Sun, Jianwen and Li, Chuanhao and Zhang, Fanrui and Li, Zizhen and Ai, Jiaxin and Zhou, Sizhuo and Chang, Yifan and Gao, Changxin and Zhang, Shenglin and Zhang, Kaipeng}, title = {From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2220-2229} }

M^3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering
Jiayi Li,
Yuxuan Hu,
Haoran Geng,
Xiangyu Chen,
Chuhao Zhou,
Ziteng Cui,
Jianfei Yang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Hu, Yuxuan and Geng, Haoran and Chen, Xiangyu and Zhou, Chuhao and Cui, Ziteng and Yang, Jianfei}, title = {M{\textasciicircum}3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3070-3081} }

Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction
Xian-Hua Han
[pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xian-Hua}, title = {Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1294-1303} }

WGS: Watertight Geometry Standardization for Scalable 3D Generation
Dehao Hao,
Tanghui Jia,
Kaiyi Zhang,
Weikai Chen,
Zeyu Hu,
Yingda Yin,
Runze Zhang,
Lingting Zhu,
Li Yuan,
Xin Wang,
Long Quan
[pdf] [supp]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Dehao and Jia, Tanghui and Zhang, Kaiyi and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Zhang, Runze and Zhu, Lingting and Yuan, Li and Wang, Xin and Quan, Long}, title = {WGS: Watertight Geometry Standardization for Scalable 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {569-578} }

ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning
Boran Wang,
Xinming Wang,
Yi Chen,
Xiang Li,
Jian Xu,
Jing Yuan,
Cheng-Lin Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Boran and Wang, Xinming and Chen, Yi and Li, Xiang and Xu, Jian and Yuan, Jing and Liu, Cheng-Lin}, title = {ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2773-2782} }

Dual Strategies for Test-Time Adaptation
Nam Nguyen Phuong,
Duc Nguyen The Minh,
Phi Le Nguyen,
Ehsan Abbasnejad,
Minh Hoai
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Phuong_2026_CVPR, author = {Phuong, Nam Nguyen and Minh, Duc Nguyen The and Le Nguyen, Phi and Abbasnejad, Ehsan and Hoai, Minh}, title = {Dual Strategies for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2483-2492} }

CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion
Akshit Jindal,
Saket Anand,
Chetan Arora,
Vikram Goyal
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jindal_2026_CVPR, author = {Jindal, Akshit and Anand, Saket and Arora, Chetan and Goyal, Vikram}, title = {CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {716-725} }

The Mechanics of CNN Filtering with Rectification
Liam Frija-Altarac,
Matthew Toews
[pdf] [supp]
[bibtex]
@InProceedings{Frija-Altarac_2026_CVPR, author = {Frija-Altarac, Liam and Toews, Matthew}, title = {The Mechanics of CNN Filtering with Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1690-1699} }

AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging
Zuchi Bazarvaani,
Seung-Ho Lee,
Jeongmin Ahn,
Donghyeon Jeon,
Inho Kang,
Seung-Hoon Na
[pdf]
[bibtex]
@InProceedings{Bazarvaani_2026_CVPR, author = {Bazarvaani, Zuchi and Lee, Seung-Ho and Ahn, Jeongmin and Jeon, Donghyeon and Kang, Inho and Na, Seung-Hoon}, title = {AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2700-2709} }

Re^2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement
Jiakun Zheng,
Ting Xiao,
Shiqin Cao,
Xinran Li,
Zhe Wang,
Chenjia Bai
[pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jiakun and Xiao, Ting and Cao, Shiqin and Li, Xinran and Wang, Zhe and Bai, Chenjia}, title = {Re{\textasciicircum}2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1378-1387} }

Fast Generative DeOcclusion for Visual Geometry and Robotics
Jieneng Chen,
Tiezheng Zhang,
Xiwei Xuan,
Ju He,
Yifan Yin,
Haojun Shi,
Suyu Ye,
Xinyi Li,
Ruisheng Yuan,
Tianmin Shu,
Alan Yuille
[pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jieneng and Zhang, Tiezheng and Xuan, Xiwei and He, Ju and Yin, Yifan and Shi, Haojun and Ye, Suyu and Li, Xinyi and Yuan, Ruisheng and Shu, Tianmin and Yuille, Alan}, title = {Fast Generative DeOcclusion for Visual Geometry and Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1314-1324} }

LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map
Wei Luo,
Xiaohan Wang,
Yuehu Liu
[pdf]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Wei and Wang, Xiaohan and Liu, Yuehu}, title = {LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1462-1471} }

VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction
Yu Hu,
Chong Cheng,
Sicheng Yu,
Xiaoyang Guo,
Hao Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Cheng, Chong and Yu, Sicheng and Guo, Xiaoyang and Wang, Hao}, title = {VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {414-424} }

Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams
Kaiyuan Chen,
Shuangyu Xie,
Andrew Goldberg,
Ken Goldberg
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Kaiyuan and Xie, Shuangyu and Goldberg, Andrew and Goldberg, Ken}, title = {Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1452-1461} }

Object Pose Transformer: Unifying Unseen Object Pose Estimation
Weihang Li,
Lorenzo Garattoni,
Fabien Despinoy,
Nassir Navab,
Benjamin Busam
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weihang and Garattoni, Lorenzo and Despinoy, Fabien and Navab, Nassir and Busam, Benjamin}, title = {Object Pose Transformer: Unifying Unseen Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {436-446} }

Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance
Junyang Chen,
Haomin Ni,
Hanjiang Lai
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Ni, Haomin and Lai, Hanjiang}, title = {Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {819-828} }

Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining
Jie Xu,
Na Zhao
[pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jie and Zhao, Na}, title = {Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {327-337} }

Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments
Chihiro Noguchi,
Takaki Yamamoto
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noguchi_2026_CVPR, author = {Noguchi, Chihiro and Yamamoto, Takaki}, title = {Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1096-1105} }

Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media
Megha Mariam K.M,
Vineeth N. Balasubramanian,
C.V. Jawahar
[pdf] [supp]
[bibtex]
@InProceedings{K.M_2026_CVPR, author = {K.M, Megha Mariam and Balasubramanian, Vineeth N. and Jawahar, C.V.}, title = {Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2079-2088} }

Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering
Powei Liao,
Jiro Abe,
Kazumine Ogura
[pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Powei and Abe, Jiro and Ogura, Kazumine}, title = {Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {109-118} }

Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration
Yujie Chen,
Haotong Qin,
Zhang Zhang,
Michele Magno,
Luca Benini,
Yawei Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yujie and Qin, Haotong and Zhang, Zhang and Magno, Michele and Benini, Luca and Li, Yawei}, title = {Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2524-2533} }

Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP
Yusung Ro,
Jaehyun Choi,
Junmo Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ro_2026_CVPR, author = {Ro, Yusung and Choi, Jaehyun and Kim, Junmo}, title = {Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3252-3261} }

Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs
Rongqian chen,
Allison Andreyev,
Yanming Xiu,
Joshua Chilukuri,
Shunav Sen,
Mahdi Imani,
Bin Li,
Maria Gorlatova,
Gang Tan,
Tian Lan
[pdf]
[bibtex]
@InProceedings{chen_2026_CVPR, author = {chen, Rongqian and Andreyev, Allison and Xiu, Yanming and Chilukuri, Joshua and Sen, Shunav and Imani, Mahdi and Li, Bin and Gorlatova, Maria and Tan, Gang and Lan, Tian}, title = {Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {799-808} }

WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting
Lezhong Wang,
Mehmet Onurcan Kaya,
Siavash Arjomand Bigdeli,
Jeppe Revall Frisvad
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lezhong and Kaya, Mehmet Onurcan and Bigdeli, Siavash Arjomand and Frisvad, Jeppe Revall}, title = {WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2007-2016} }

Context-Aware Semantic Segmentation via Stage-Wise Attention
Antoine Carreaud,
Elias Naha,
Arthur Chansel,
Nina Lahellec,
Jan Skaloud,
Adrien Gressin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Carreaud_2026_CVPR, author = {Carreaud, Antoine and Naha, Elias and Chansel, Arthur and Lahellec, Nina and Skaloud, Jan and Gressin, Adrien}, title = {Context-Aware Semantic Segmentation via Stage-Wise Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2680-2690} }

Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach
Xincheng Wang,
Hanchi Sun,
Wenjun Sun,
Kejun Xue,
Wangqiu Zhou,
Jianbo Zhang,
Wei Sun,
Dandan Zhu,
Xiongkuo Min,
Jun Jia,
Zhijun Fang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xincheng and Sun, Hanchi and Sun, Wenjun and Xue, Kejun and Zhou, Wangqiu and Zhang, Jianbo and Sun, Wei and Zhu, Dandan and Min, Xiongkuo and Jia, Jun and Fang, Zhijun}, title = {Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2230-2239} }

Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers
Ziyi Guo,
Zhou Liu,
Wentao Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyi and Liu, Zhou and Zhang, Wentao}, title = {Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1996-2006} }

OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models
Yuping Yan,
Yuhan Xie,
Yuanshuai Li,
Yingchao Yu,
Lingjuan Lyu,
Yaochu Jin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Yuping and Xie, Yuhan and Li, Yuanshuai and Yu, Yingchao and Lyu, Lingjuan and Jin, Yaochu}, title = {OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1965-1975} }

ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization
Hao Cao,
Chengbin Liang,
Wenqi Guo,
Zhijin Qin,
Jungong Han
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Hao and Liang, Chengbin and Guo, Wenqi and Qin, Zhijin and Han, Jungong}, title = {ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2915-2925} }

In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection
Yunxuan Li,
Bohao Liu,
Yanxia Wu,
Rongsheng Li
[pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yunxuan and Liu, Bohao and Wu, Yanxia and Li, Rongsheng}, title = {In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2398-2407} }

Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator
Xiankang He,
Dongyan Guo,
Hongji Li,
Ying Cui,
Libo Weng,
Ruibo Li,
Chi Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xiankang and Guo, Dongyan and Li, Hongji and Cui, Ying and Weng, Libo and Li, Ruibo and Zhang, Chi}, title = {Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {591-601} }

PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning
Wenxiang Xie,
Anpei Chen,
Haoming Yu,
Yujun Shen,
Weiwei Xu
[pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Wenxiang and Chen, Anpei and Yu, Haoming and Shen, Yujun and Xu, Weiwei}, title = {PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3156-3166} }

Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features
Minseo Seong,
Youngwook Kim
[pdf]
[bibtex]
@InProceedings{Seong_2026_CVPR, author = {Seong, Minseo and Kim, Youngwook}, title = {Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2473-2482} }

Name That Part: 3D Part Segmentation and Naming
Soumava Paul,
Prakhar Kaushik,
Ankit Vaidya,
Anand Bhattad,
Alan Yuille
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paul_2026_CVPR, author = {Paul, Soumava and Kaushik, Prakhar and Vaidya, Ankit and Bhattad, Anand and Yuille, Alan}, title = {Name That Part: 3D Part Segmentation and Naming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1808-1817} }

FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series
Devansh Garg
[pdf]
[bibtex]
@InProceedings{Garg_2026_CVPR, author = {Garg, Devansh}, title = {FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1934-1942} }

Intelligent Photo Retouching with Language Model-Based Artist Agents
Haoyu Chen,
Keda Tao,
YiZao Wang,
Xinlei Wang,
Lei Zhu,
Jinjin Gu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haoyu and Tao, Keda and Wang, YiZao and Wang, Xinlei and Zhu, Lei and Gu, Jinjin}, title = {Intelligent Photo Retouching with Language Model-Based Artist Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1240-1251} }

A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action
Kaidong Zhang,
Jian Zhang,
Rongtao Xu,
Yu Sun,
Youpeng Wen,
Shuoshuo Xue,
Xiaoyu Guo,
Minghao Guo,
Weijia Liufu,
Liu Zihou,
Kangyi Ji,
Zihang Li,
Ruiyi Chen,
Meng Cao,
Jingming Zhang,
Shen Zhao,
Xiaojun Chang,
Feng Zheng,
Ivan Laptev,
Xiaodan Liang
[pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaidong and Zhang, Jian and Xu, Rongtao and Sun, Yu and Wen, Youpeng and Xue, Shuoshuo and Guo, Xiaoyu and Guo, Minghao and Liufu, Weijia and Zihou, Liu and Ji, Kangyi and Li, Zihang and Chen, Ruiyi and Cao, Meng and Zhang, Jingming and Zhao, Shen and Chang, Xiaojun and Zheng, Feng and Laptev, Ivan and Liang, Xiaodan}, title = {A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1503-1514} }

3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization
Sizhe Song,
Yankuan Chi,
Shuhan Zhong,
S.-H. Gary Chan
[pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Sizhe and Chi, Yankuan and Zhong, Shuhan and Chan, S.-H. Gary}, title = {3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {360-369} }

Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers
Syeda Fiza Rubab,
Arslan Abdul Ghaffar,
Malik Junaid Jami Gul,
Sheriff Murtala,
Ingyu Lee,
Gyu Sang Choi
[pdf] [supp]
[bibtex]
@InProceedings{Rubab_2026_CVPR, author = {Rubab, Syeda Fiza and Ghaffar, Arslan Abdul and Gul, Malik Junaid Jami and Murtala, Sheriff and Lee, Ingyu and Choi, Gyu Sang}, title = {Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2844-2851} }

Phantasia: Context-Adaptive Backdoors in Vision Language Models
Nam Duong Tran,
Phi Le Nguyen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Nam Duong and Le Nguyen, Phi}, title = {Phantasia: Context-Adaptive Backdoors in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {695-704} }

DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation
Haiduo Huang,
Jiangcheng Song,
Yadong Zhang,
Guansu Wang,
Pengju Ren
[pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Haiduo and Song, Jiangcheng and Zhang, Yadong and Wang, Guansu and Ren, Pengju}, title = {DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2111-2120} }

Efficient Document Parsing via Parallel Token Prediction
Lei Li,
Ze Zhao,
Meng Li,
Zhongwang Lun,
Yi Yuan,
Xingjing Lu,
Zheng Wei,
Jiang Bian,
Zang Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lei and Zhao, Ze and Li, Meng and Lun, Zhongwang and Yuan, Yi and Lu, Xingjing and Wei, Zheng and Bian, Jiang and Li, Zang}, title = {Efficient Document Parsing via Parallel Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2763-2772} }

Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment
Qinlin Hu,
Mingliang Zhou,
Xingran Liao
[pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Qinlin and Zhou, Mingliang and Liao, Xingran}, title = {Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2629-2638} }

NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation
Lizhi Xiong,
Jianguo Feng,
Ziqiang Li,
Jun Li,
Weiwei Jiang,
Zhangjie Fu
[pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Lizhi and Feng, Jianguo and Li, Ziqiang and Li, Jun and Jiang, Weiwei and Fu, Zhangjie}, title = {NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {675-684} }

4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System
Bo Xu,
Jun Dai,
Yutian Chen,
Linning Xu,
Mulin Yu,
Yujin Wang,
Shi Guo,
Xinyi Le,
Tianfan Xue
[pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Dai, Jun and Chen, Yutian and Xu, Linning and Yu, Mulin and Wang, Yujin and Guo, Shi and Le, Xinyi and Xue, Tianfan}, title = {4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {43-53} }

ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization
Yanzhao Yu,
Yi Ding,
Peijun Tang,
Haotian Yang,
Xianbiao Qi,
Jianan Wang,
Xueqian Wang
[pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yanzhao and Ding, Yi and Tang, Peijun and Yang, Haotian and Qi, Xianbiao and Wang, Jianan and Wang, Xueqian}, title = {ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1441-1451} }

Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency
Jingi Kim,
Wonjun Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jingi and Kim, Wonjun}, title = {Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {139-148} }

Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving
Mingbo Dai,
Han Yan,
Bolun Zhang,
Wu Ran,
Chao Ma
[pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Mingbo and Yan, Han and Zhang, Bolun and Ran, Wu and Ma, Chao}, title = {Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {181-190} }

MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition
Haote Yang,
Hui Wang,
Chen Zhu,
Jingchao Wang,
Linye Li,
Hongbin Lai,
Huijie Ao,
Yongxuan Lv,
Jiang Wu,
Jiaxing Sun,
Lua Chen,
Yuanyuan Cao,
Ruijie Zhang,
Shengxin Lu,
Lijun Wu,
Bin Wang,
Conghui He
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haote and Wang, Hui and Zhu, Chen and Wang, Jingchao and Li, Linye and Lai, Hongbin and Ao, Huijie and Lv, Yongxuan and Wu, Jiang and Sun, Jiaxing and Chen, Lua and Cao, Yuanyuan and Zhang, Ruijie and Lu, Shengxin and Wu, Lijun and Wang, Bin and He, Conghui}, title = {MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1924-1933} }

Latent Domain Modeling Improves Robustness to Geographic Shifts
Ruth Crasto,
Esther Rolf
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Crasto_2026_CVPR, author = {Crasto, Ruth and Rolf, Esther}, title = {Latent Domain Modeling Improves Robustness to Geographic Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2419-2428} }

MADrive: Memory-Augmented Driving Scene Modeling
Polina Karpikova,
Daniil Selikhanovych,
Kirill Struminsky,
Ruslan Musaev,
Maria Golitsyna,
Dmitry Baranchuk
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karpikova_2026_CVPR, author = {Karpikova, Polina and Selikhanovych, Daniil and Struminsky, Kirill and Musaev, Ruslan and Golitsyna, Maria and Baranchuk, Dmitry}, title = {MADrive: Memory-Augmented Driving Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {54-65} }

INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models
Parsa Madinei,
Ryan Solgi,
Ziqi Wen,
Jonathan Skaza,
Miguel Eckstein,
Ramtin Pedarsani
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Madinei_2026_CVPR, author = {Madinei, Parsa and Solgi, Ryan and Wen, Ziqi and Skaza, Jonathan and Eckstein, Miguel and Pedarsani, Ramtin}, title = {INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2947-2956} }

Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning
Kyle Domico,
Jean-Charles Noirot Ferrand,
Ryan Sheatsley,
Eric Pauley,
Josiah Hanna,
Patrick McDaniel
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Domico_2026_CVPR, author = {Domico, Kyle and Ferrand, Jean-Charles Noirot and Sheatsley, Ryan and Pauley, Eric and Hanna, Josiah and McDaniel, Patrick}, title = {Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {647-655} }

3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction
Beiyuan Zhang,
Hesong Li,
Ruiwen Shao,
Ying Fu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beiyuan and Li, Hesong and Shao, Ruiwen and Fu, Ying}, title = {3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {306-315} }

Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework
Yilin Wang,
Dawei Luo,
Shuai Chen,
Feng Xu,
Jiachi Wang,
Zunlei Feng,
Yijun Bei
[pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yilin and Luo, Dawei and Chen, Shuai and Xu, Feng and Wang, Jiachi and Feng, Zunlei and Bei, Yijun}, title = {Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2314-2323} }

Enriching Knowledge Distillation with Cross-Modal Teacher Fusion
Amir M. Mansourian,
Amir Mohammad Babaei,
Shohreh Kasaei
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mansourian_2026_CVPR, author = {Mansourian, Amir M. and Babaei, Amir Mohammad and Kasaei, Shohreh}, title = {Enriching Knowledge Distillation with Cross-Modal Teacher Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2893-2903} }

Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection
Yan Li,
Zhouchao Fu,
Wenbin Lu,
Junjie Zheng,
Junnan Xu,
Junjie Liao,
Jianwei Zheng
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Fu, Zhouchao and Lu, Wenbin and Zheng, Junjie and Xu, Junnan and Liao, Junjie and Zheng, Jianwei}, title = {Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1546-1555} }

MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training
Zhenhan Yin,
Xuanhan Wang,
Jiahao Jiang,
Kaiyuan Deng,
Pengqi Chen,
Shuangle Li,
Chong Liu,
Xing Xu,
Jingkuan Song,
Lianli Gao,
Heng Tao Shen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Zhenhan and Wang, Xuanhan and Jiang, Jiahao and Deng, Kaiyuan and Chen, Pengqi and Li, Shuangle and Liu, Chong and Xu, Xing and Song, Jingkuan and Gao, Lianli and Shen, Heng Tao}, title = {MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1535-1545} }

3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework
Tobias Sautter,
Jan-Niklas Dihlmann,
Hendrik P A Lensch
[pdf] [arXiv]
[bibtex]
@InProceedings{Sautter_2026_CVPR, author = {Sautter, Tobias and Dihlmann, Jan-Niklas and A Lensch, Hendrik P}, title = {3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {528-537} }

GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes
Beibei Lin,
Xiao Cao,
Jingyuan Guo,
Robby T. Tan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Beibei and Cao, Xiao and Guo, Jingyuan and Tan, Robby T.}, title = {GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {275-284} }

Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving
Zhexi Lian,
Haoran Wang,
Xuerun Yan,
Weimeng Lin,
Xianhong Zhang,
Yongyu Chen,
Jia Hu
[pdf] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Zhexi and Wang, Haoran and Yan, Xuerun and Lin, Weimeng and Zhang, Xianhong and Chen, Yongyu and Hu, Jia}, title = {Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {920-930} }

From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation
Rafael Pablos Sarabia,
Joachim Nyborg,
Morten Birk,
Ira Assent
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarabia_2026_CVPR, author = {Sarabia, Rafael Pablos and Nyborg, Joachim and Birk, Morten and Assent, Ira}, title = {From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2606-2617} }

Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use
Ding Yi,
Yanzhao Yu,
Xili Dai,
Xianbiao Qi,
Peiwen Sun,
Xueqian Wang,
Xiangyu Yue,
Jianan Wang
[pdf] [supp]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Ding and Yu, Yanzhao and Dai, Xili and Qi, Xianbiao and Sun, Peiwen and Wang, Xueqian and Yue, Xiangyu and Wang, Jianan}, title = {Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1346-1357} }

EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks
Lulin Liu,
Dayou Li,
Yiqing Liang,
Sicong Jiang,
Hitesh Vijay,
Hezhen Hu,
Xuhai Xu,
Zirui Liu,
Srinivas Shakkottai,
Manling Li,
Zhiwen Fan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Lulin and Li, Dayou and Liang, Yiqing and Jiang, Sicong and Vijay, Hitesh and Hu, Hezhen and Xu, Xuhai and Liu, Zirui and Shakkottai, Srinivas and Li, Manling and Fan, Zhiwen}, title = {EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2017-2027} }

Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling
Pengfei Yang,
Sifu Luo,
Feng Wu,
Fan Zhou,
Ting Zhong
[pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Pengfei and Luo, Sifu and Wu, Feng and Zhou, Fan and Zhong, Ting}, title = {Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1148-1158} }

VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving
Levente Tempfli,
Esteban Rivera,
Markus Lienkamp
[pdf] [supp]
[bibtex]
@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Rivera, Esteban and Lienkamp, Markus}, title = {VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {960-969} }

Event-Based Optical Flow Leveraging Precise Event Timing
Hugh Greatorex,
Elisabetta Chicca
[pdf] [supp]
[bibtex]
@InProceedings{Greatorex_2026_CVPR, author = {Greatorex, Hugh and Chicca, Elisabetta}, title = {Event-Based Optical Flow Leveraging Precise Event Timing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3178-3188} }

Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering
Noah Frahm,
Prakrut Patel,
Yue Zhang,
Shoubin Yu,
Mohit Bansal,
Roni Sengupta
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Frahm_2026_CVPR, author = {Frahm, Noah and Patel, Prakrut and Zhang, Yue and Yu, Shoubin and Bansal, Mohit and Sengupta, Roni}, title = {Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3114-3123} }

MPM: Mutual Pair Merging for Efficient Vision Transformers
Simon Ravé,
Pejman Rasti,
David Rousseau
[pdf] [supp]
[bibtex]
@InProceedings{Rave_2026_CVPR, author = {Rav\'e, Simon and Rasti, Pejman and Rousseau, David}, title = {MPM: Mutual Pair Merging for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2998-3008} }

Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior
Jiaying Lin,
Shuquan Ye,
Dan Xu,
Wanli Ouyang,
Rynson W. H. Lau
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiaying and Ye, Shuquan and Xu, Dan and Ouyang, Wanli and Lau, Rynson W. H.}, title = {Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1818-1827} }

AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning
Qilin Xiang,
Qilin Fan,
Xinrui Li,
Tianfu Wang,
Shuting Qiu,
Yue Niu
[pdf]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qilin and Fan, Qilin and Li, Xinrui and Wang, Tianfu and Qiu, Shuting and Niu, Yue}, title = {AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3019-3028} }

InstructTable: Improving Table Structure Recognition Through Instruction
Boming Chen,
Zining Wang,
Zhentao Guo,
Jianqiang Liu,
Chen Duan,
Yu Gu,
Kai zhou,
Pengfei Yan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Boming and Wang, Zining and Guo, Zhentao and Liu, Jianqiang and Duan, Chen and Gu, Yu and zhou, Kai and Yan, Pengfei}, title = {InstructTable: Improving Table Structure Recognition Through Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2742-2752} }

AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models
Jintao Lin,
Bowen Dong,
Weikang Shi,
Chenyang Lei,
Suiyun Zhang,
Rui Liu,
Xihui Liu
[pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jintao and Dong, Bowen and Shi, Weikang and Lei, Chenyang and Zhang, Suiyun and Liu, Rui and Liu, Xihui}, title = {AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1797-1807} }

LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets
Woo-Jin Jung,
Dong-Hee Paek,
Seung-Hyun Kong
[pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Woo-Jin and Paek, Dong-Hee and Kong, Seung-Hyun}, title = {LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {889-899} }

DINO-VO: Learning Where to Focus for Enhanced State Estimation
Qi Chen,
Guanghao Li,
Sijia Hu,
Xin Gao,
Junpeng Ma,
Xiangyang Xue,
Jian Pu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qi and Li, Guanghao and Hu, Sijia and Gao, Xin and Ma, Junpeng and Xue, Xiangyang and Pu, Jian}, title = {DINO-VO: Learning Where to Focus for Enhanced State Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1556-1566} }

GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis
Thomas Tanay,
Mohammed Brahimi,
Michal Nazarczuk,
Qingwen Zhang,
Sibi Catley-Chandar,
Arthur Moreau,
Zhensong Zhang,
Eduardo Pérez-Pellitero
[pdf] [arXiv]
[bibtex]
@InProceedings{Tanay_2026_CVPR, author = {Tanay, Thomas and Brahimi, Mohammed and Nazarczuk, Michal and Zhang, Qingwen and Catley-Chandar, Sibi and Moreau, Arthur and Zhang, Zhensong and P\'erez-Pellitero, Eduardo}, title = {GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {348-359} }

The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning
Garima Arya Yadav,
Nilay Yilmaz,
Yezhou Yang
[pdf] [supp]
[bibtex]
@InProceedings{Yadav_2026_CVPR, author = {Yadav, Garima Arya and Yilmaz, Nilay and Yang, Yezhou}, title = {The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2069-2078} }

SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction
Kang Han,
Wei Xiang,
Lu Yu,
Mathew Wyatt,
Gaowen Liu,
Ramana Rao Kompella
[pdf] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Kang and Xiang, Wei and Yu, Lu and Wyatt, Mathew and Liu, Gaowen and Kompella, Ramana Rao}, title = {SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {12-21} }

SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration
Zhimin Shao,
Abhay Yadav,
Rama Chellappa,
Cheng Peng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Zhimin and Yadav, Abhay and Chellappa, Rama and Peng, Cheng}, title = {SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {253-263} }

Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data
Dumindu Tissera,
Omar Awadallah,
Muhammad Umair Danish,
Ayan Sadhu,
Katarina Grolinger
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tissera_2026_CVPR, author = {Tissera, Dumindu and Awadallah, Omar and Danish, Muhammad Umair and Sadhu, Ayan and Grolinger, Katarina}, title = {Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2429-2439} }

ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph
Junhao Cai,
Deyu Zeng,
Junhao Pang,
Lini Li,
Xiaopin Zhong,
Zongze Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Junhao and Zeng, Deyu and Pang, Junhao and Li, Lini and Zhong, Xiaopin and Wu, Zongze}, title = {ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {295-305} }

UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics
Joseph Raj Vishal,
Nagasiri Poluri,
Katha Naik,
Rutuja Patil,
Kashyap Hegde Kota,
Krishna Vinod,
Prithvi Jai Ramesh,
Mohammad Farhadi,
Yezhou Yang,
Bharatesh Chakravarthi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vishal_2026_CVPR, author = {Vishal, Joseph Raj and Poluri, Nagasiri and Naik, Katha and Patil, Rutuja and Kota, Kashyap Hegde and Vinod, Krishna and Ramesh, Prithvi Jai and Farhadi, Mohammad and Yang, Yezhou and Chakravarthi, Bharatesh}, title = {UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1862-1871} }

MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments
Zhiyu Huang,
Zewei Zhou,
Tianhui Cai,
Yun Zhang,
Jiaqi Ma
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhiyu and Zhou, Zewei and Cai, Tianhui and Zhang, Yun and Ma, Jiaqi}, title = {MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {878-888} }

MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs
Zhilin Lin,
Zhihui Zhang,
Shiliang Sun,
Jing Zhao,
Hao Yang
[pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zhilin and Zhang, Zhihui and Sun, Shiliang and Zhao, Jing and Yang, Hao}, title = {MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2089-2099} }

Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM
Cabrel Wouladje,
Golden Tendekai Mumanikidzwa,
Md Apon Islam,
Huiying Xu,
Hongbo Li,
Wenzhe Tan,
Zhendong Chen,
Xinzhong Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Wouladje_2026_CVPR, author = {Wouladje, Cabrel and Mumanikidzwa, Golden Tendekai and Islam, Md Apon and Xu, Huiying and Li, Hongbo and Tan, Wenzhe and Chen, Zhendong and Zhu, Xinzhong}, title = {Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1567-1576} }

Instant Colorization of Gaussian Splats
Daniel Lieber,
Alexander Mock,
Nils Wandel
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lieber_2026_CVPR, author = {Lieber, Daniel and Mock, Alexander and Wandel, Nils}, title = {Instant Colorization of Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {170-180} }

Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning
Yibing Wei,
Sudeep Katakol,
Manuel Brack,
Jinhong Lin,
Haoyue Bai,
Yu-Teng Li,
Richard Zhang,
Eli Shechtman,
Hareesh Ravi,
Ajinkya Kale
[pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yibing and Katakol, Sudeep and Brack, Manuel and Lin, Jinhong and Bai, Haoyue and Li, Yu-Teng and Zhang, Richard and Shechtman, Eli and Ravi, Hareesh and Kale, Ajinkya}, title = {Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1883-1892} }

Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment
Huaiyuan Qin,
Muli Yang,
Gabriel James Goenawan,
Kai Wang,
Zheng Wang,
Peng Hu,
Xi Peng,
Hongyuan Zhu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Huaiyuan and Yang, Muli and Goenawan, Gabriel James and Wang, Kai and Wang, Zheng and Hu, Peng and Peng, Xi and Zhu, Hongyuan}, title = {Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3029-3039} }

Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams
Zhenghui Guo,
Yuanbin Man,
Junyuan Sheng,
Bowen Lin,
Ahmed Ahmed,
Bo Jiang,
Boyuan Zhang,
Miao Yin,
Sian Jin,
Omprakash Gnawali,
Chengming Zhang
[pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Zhenghui and Man, Yuanbin and Sheng, Junyuan and Lin, Bowen and Ahmed, Ahmed and Jiang, Bo and Zhang, Boyuan and Yin, Miao and Jin, Sian and Gnawali, Omprakash and Zhang, Chengming}, title = {Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3060-3069} }

LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images
Yilong Liu,
Wanhua Li,
Chen Zhu-Tian,
Hanspeter Pfister
[pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yilong and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {191-201} }

World Model Robustness via Surprise Recognition
Geigh Zollicoffer,
Tanush Chopra,
Mingkuan Yan,
Xiaoxu Ma,
Kenneth Eaton,
Mark Riedl
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zollicoffer_2026_CVPR, author = {Zollicoffer, Geigh and Chopra, Tanush and Yan, Mingkuan and Ma, Xiaoxu and Eaton, Kenneth and Riedl, Mark}, title = {World Model Robustness via Surprise Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3146-3155} }

U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration
Michael Smith,
Frank P. Ferrie
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Smith_2026_CVPR, author = {Smith, Michael and Ferrie, Frank P.}, title = {U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1850-1861} }

Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety
Younggun Kim,
Sirnam Swetha,
Fazil Kagdi,
Mubarak Shah
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Younggun and Swetha, Sirnam and Kagdi, Fazil and Shah, Mubarak}, title = {Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2100-2110} }

PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models
Jinkai Li,
Zhenliang Zhang,
Lifeng Fan,
Wei Wang
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinkai and Zhang, Zhenliang and Fan, Lifeng and Wang, Wei}, title = {PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1976-1985} }

LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers
Md Abtahi Majeed Chowdhury,
Md Rifat Ur Rahman,
Akil Ahmad Taki
[pdf] [supp]
[bibtex]
@InProceedings{Chowdhury_2026_CVPR, author = {Chowdhury, Md Abtahi Majeed and Rahman, Md Rifat Ur and Taki, Akil Ahmad}, title = {LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1640-1649} }

Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression
Haihao Zhang,
Siwei Dong,
Jianing Li,
Rui Zhao,
Yunjian Zhang,
Geng Qin,
Lin Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haihao and Dong, Siwei and Li, Jianing and Zhao, Rui and Zhang, Yunjian and Qin, Geng and Zhu, Lin}, title = {Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3220-3230} }

SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense
Patryk Krukowski,
Lukasz Gorczyca,
Piotr Helm,
Kamil Ksiazek,
Przemyslaw Spurek
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krukowski_2026_CVPR, author = {Krukowski, Patryk and Gorczyca, Lukasz and Helm, Piotr and Ksiazek, Kamil and Spurek, Przemyslaw}, title = {SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2377-2386} }

MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection
Xinying Li,
Junfeng Jing,
Tong Wu,
Tian Gao,
Zhihong Sheng
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinying and Jing, Junfeng and Wu, Tong and Gao, Tian and Sheng, Zhihong}, title = {MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2585-2595} }

NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks
Pengcheng Chen,
Yue Hu,
Wenhao Li,
Nicole M Gunderson,
Andrew Feng,
Zhenglong Sun,
Peter Beerel,
Eric J Seibel
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Pengcheng and Hu, Yue and Li, Wenhao and Gunderson, Nicole M and Feng, Andrew and Sun, Zhenglong and Beerel, Peter and Seibel, Eric J}, title = {NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {403-413} }

FLToM: Robust Federated Learning with Theory-of-Mind Structure
Tianshu Xiao,
Liu Yang,
Sichang Guo,
Qilong Wang,
Qinghua Hu
[pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Tianshu and Yang, Liu and Guo, Sichang and Wang, Qilong and Hu, Qinghua}, title = {FLToM: Robust Federated Learning with Theory-of-Mind Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2503-2513} }

RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes
Michael Baltaxe,
Dan Levi,
Sagie Benaim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baltaxe_2026_CVPR, author = {Baltaxe, Michael and Levi, Dan and Benaim, Sagie}, title = {RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {558-568} }

G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction
Chao Ning,
Naoto Yokoya
[pdf] [supp]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Yokoya, Naoto}, title = {G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {518-527} }

Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents
Nishad Singhi,
Christian Bialas,
Snehal Jauhri,
Vignesh Prasad,
Georgia Chalvatzaki,
Marcus Rohrbach,
Anna Rohrbach
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singhi_2026_CVPR, author = {Singhi, Nishad and Bialas, Christian and Jauhri, Snehal and Prasad, Vignesh and Chalvatzaki, Georgia and Rohrbach, Marcus and Rohrbach, Anna}, title = {Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3124-3135} }

Back