Findings
- Back
Spatial Transcriptomics as Images for Large-Scale Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yishun and Qi, Jiaxin and Wang, Jian and Zheng, Yuhua and Huang, Jianqiang}, title = {Spatial Transcriptomics as Images for Large-Scale Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1191-1200} }
GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jialin and Fu, Bin and Wang, Ruiping and Chen, Xilin}, title = {GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {264-274} }
Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Erwei and Zeng, Haijin and Xiao, Weiwei and Cao, Shijie and Shan, Qiben and Wu, Shaocong and Su, Jingyong and Liu, Jie}, title = {Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2937-2946} }
AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Lu, Yung-Hsiang and Davis, James C.}, title = {AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2618-2628} }
Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Youngjun and Kang, Joonseong and Lim, Sungjun and Song, Kyungwoo}, title = {Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2387-2397} }
CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Meiqi and Xu, Longnyu and Liu, Jun and Li, Hewu and Qiu, Han}, title = {CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1482-1491} }
AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Tianyi and Tang, Tao and Gui, Xingtai and Li, Yongkang and Zheng, Jiasen and Huang, Weiyao and Kong, Lingdong and Han, Wencheng and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1085-1095} }
What Matters for Scalable and Robust Learning in End-to-End Driving Planners?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Holtz_2026_CVPR, author = {Holtz, David and Hanselmann, Niklas and Doll, Simon and Cordts, Marius and Schiele, Bernt}, title = {What Matters for Scalable and Robust Learning in End-to-End Driving Planners?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {931-941} }
Active Exploration for Sparse Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Lidholm_2026_CVPR, author = {Lidholm, Johanna and Dill\'en, Ludvig and Kukelova, Zuzana and Sattler, Torsten and Larsson, Viktor}, title = {Active Exploration for Sparse Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {338-347} }
IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Lindstrom_2026_CVPR, author = {Lindstr\"om, Carl and Rafidashti, Mahan and Fatemi, Maryam and Hammarstrand, Lars and Oswald, Martin R. and Svensson, Lennart}, title = {IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {316-326} }
MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Mengxi and Zhou, Long and Li, Zhixia and Kwan, Adrian and Laprise, Denis and Huang, Hengyi and Wu, Xiaqing and Wu, Shuang}, title = {MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {990-999} }
SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schlesinger_2026_CVPR, author = {Schlesinger, Oded and Farzam, Amirhossein and Di Martino, J. Matias and Sapiro, Guillermo}, title = {SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2335-2345} }
CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nottebaum_2026_CVPR, author = {Nottebaum, Moritz and Dunnhofer, Matteo and Micheloni, Christian}, title = {CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2493-2502} }
2D Triangle Splatting for Direct Differentiable Mesh Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Kaifeng and Zhou, Zheng and Peng, Yingliang and Wang, Qianwei}, title = {2D Triangle Splatting for Direct Differentiable Mesh Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {285-294} }
DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Yoonhwa and Fu, Junryu and Golparvar-Fard, Mani}, title = {DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2121-2130} }
Debiased One-Shot NAS Via Density-Aware Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Roshtkhari_2026_CVPR, author = {Roshtkhari, Mehraveh Javan and Toews, Matthew and Pedersoli, Marco}, title = {Debiased One-Shot NAS Via Density-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2357-2366} }
SLAD : Shared LoRA Adapters for Task Specific Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Bensaid_2026_CVPR, author = {Bensaid, Reda and Bendou, Yassir and Gripon, Vincent and Leduc-Primeau, Fran\c{c}ois}, title = {SLAD : Shared LoRA Adapters for Task Specific Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2968-2977} }
OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Liu, Fangfu and Wang, Yule and Pang, Yize and Duan, Yueqi}, title = {OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {66-76} }
AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinghan and Liu, Xiao and Xu, Yifan and Fu, Jiaqi and Huang, Jiayu and Liu, Yixuan and Dong, Yuxiao and Tang, Jie}, title = {AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1700-1710} }
CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ping and Li, Chengzhou and Meng, Guanchen and Jia, Qi and Liu, Jinyuan and Liu, Zhu and Liu, Yu and Luo, Zhongxuan and Fan, Xin}, title = {CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1660-1669} }
Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shan and Yang, Zhao and Yan, Tianwei and Gong, Yusong and Wan, Qian and Chen, Shizhao and Song, Shezheng and Wang, Chengyu and Wang, Meng}, title = {Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2304-2313} }
KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Rufan and Xue, Tianyi and Zhou, Tiantian and Wu, Weiwei and Li, Changle and Lu, Yuhuan}, title = {KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {980-989} }
GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Zehong and Liu, Zhiyuan and Wang, Yuning and Li, Jinhao and Jiang, Junkai and Jiang, Yanbo and Xu, Zhenhua and Wang, Jianqiang}, title = {GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1029-1038} }
A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Duo and Yang, Zuhao and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2823-2833} }
PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Kangmin and Lee, MinKyu and Kim, Tae-Young and Lee, ByeongCheol and An, JoonSeoung and Heo, Jae-Pil}, title = {PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {468-477} }
FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Dian and Xu, Zhengzheng and Wang, Peiyang and Liu, Like and Wang, Yule and Shi, Jieqi and Huo, Jing}, title = {FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1325-1334} }
Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Yu and Sun, Sheng and Cheng, Shengjia and Liu, Teli and Li, Mingfeng and Liu, Min}, title = {Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {837-846} }
Plug-and-Think: Structured Reasoning for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Kaikai and wen, Di and Li, Xinhai and Xiang, Senwei}, title = {Plug-and-Think: Structured Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3136-3145} }
Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Lin and Ou, Yafei and Deng, Zhipeng and Dai, Pengyu and Hou, Chongxian and Yan, Jiale and Li, Yaqian and Long, Kaiwen and Gong, Xun and Ikebe, Masayuki and Zheng, Yefeng}, title = {Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2049-2059} }
Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Akbarian_2026_CVPR, author = {Akbarian, Fatemeh and Baninajjar, Anahita and Zhang, Yingyi and Balashankar, Ananth and Aminifar, Amir}, title = {Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {748-757} }
LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minkwan and Lee, Seungmin and Kim, Junho and Kim, Young Min}, title = {LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {488-497} }
VideoMatGen: PBR Materials through Joint Generative Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hasselgren_2026_CVPR, author = {Hasselgren, Jon and Hasan, Milos and Zeng, Zheng and Munkberg, Jacob}, title = {VideoMatGen: PBR Materials through Joint Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2440-2450} }
Environmental Understanding Vision-language Model for Embodied Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bang_2026_CVPR, author = {Bang, Jinsik and Bae, Jaeyeon and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Environmental Understanding Vision-language Model for Embodied Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3092-3102} }
Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jungkon and Jung, Cheolseung and Choi, Jong-Min and Lee, Juseong}, title = {Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {847-856} }
Learning Vision-Language-Action World Models for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Guoqing and Tang, Pin and Ren, Xiangxuan and Zhao, Guodongfang and Feng, Bailan and Ma, Chao}, title = {Learning Vision-Language-Action World Models for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1073-1084} }
Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Yibin and Teng, Xichao and Chen, Shuo and Liu, Leqi and Wang, Kun and Song, Xiaokai and Li, Zhang}, title = {Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1731-1741} }
BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharma_2026_CVPR, author = {Sharma, Akash and Mhatre, Chinmay and Gawali, Sankalp and Bokkasam, Ruthvik and Sharma, Brij and Pattanaik, Vishwajeet and Rathore, Punit and Krishnapuram, Raghu and Kovvali, Vijay Gopal and Chakraborty, Anirban and Simmhan, Yogesh}, title = {BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2240-2249} }
Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenhu and Wu, Yiming and Wang, Huanyu and Liu, YaoYang and Dou, Huanzhang and Yang, Senqiao and Wu, Sitong and Zhao, Hanbin and Jia, Jiaya}, title = {Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2852-2862} }
Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yanyun and Ye, Qingqing and Liu, Li and Liang, Zi and Hu, Haibo}, title = {Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {768-778} }
Memorization in 3D Shape Generation: An Empirical Study-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pu_2026_CVPR, author = {Pu, Shu and Zeng, Boya and Zhou, Kaichen and Wang, Mengyu and Liu, Zhuang}, title = {Memorization in 3D Shape Generation: An Empirical Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1828-1838} }
Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parolari_2026_CVPR, author = {Parolari, Luca and Faccioli, Nicla and Ballan, Lamberto}, title = {Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1742-1751} }
DaMN: Deleting and Migrating Normalization Layers from Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Ryabykin_2026_CVPR, author = {Ryabykin, Alexey and Zhelavskaya, Irina and Shvetsov, Egor and Rukhovich, Alexey and Okhotnikov, Nikita and Khrapov, Artem and Burnaev, Evgeny and Kryzhanovskiy, Vladimir Mikhailovich}, title = {DaMN: Deleting and Migrating Normalization Layers from Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2883-2892} }
Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer-
[pdf]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Chenhang and Zhang, An and Chen, Yuxin and Deng, Gelei and Zheng, Jingnan and Liang, Zhenkai and Wang, Xiang and Chua, Tat-Seng}, title = {Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2346-2356} }
Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting-
[pdf]
[supp]
[bibtex]@InProceedings{Bhattarai_2026_CVPR, author = {Bhattarai, Ananta R. and Rhodin, Helge}, title = {Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {602-612} }
TransKV: A Data-Driven Pruning Method for Large Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Guangning and Meng, Fanxu and Zhou, Ruijie and Ng, Michael K and Pei, Wenjie and Zhang, Muhan}, title = {TransKV: A Data-Driven Pruning Method for Large Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2451-2461} }
MaMe: Matrix-Based Token Merging-
[pdf]
[supp]
[bibtex]@InProceedings{Huo_2026_CVPR, author = {Huo, Simin and Li, Ning}, title = {MaMe: Matrix-Based Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2863-2872} }
BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayao and Zhang, Yiping and Hasan, Mohammad Maruf and Lei, Xiaoying and Zhang, Jiale and Zhu, Junwu and Wu, Qilin and Zhao, Dongfang}, title = {BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {705-715} }
Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiang and Zhou, Yimin and Wang, Jinxiang and Huang, Yujun and Xie, Shuzhao and Qin, Shiyu and Hong, Mingyao and Li, Jiawei and Wang, Yaowei and Wang, Zhi and Xia, Shu-Tao and Chen, Bin}, title = {Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2261-2271} }
See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Le Thien Phuc and Yu, Zhuoran and Hang, Samuel Low Yu and An, Subin and Lee, Jeongik and Ban, Yohan and Chung, SeungEun and Nguyen, Thanh-Huy and Maeng, JuWan and Lee, Soochahn and Lee, Yong Jae}, title = {See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2272-2283} }
RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Ha_2026_CVPR, author = {Ha, Yoonwoo and Moon, Hyungpil}, title = {RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1525-1534} }
GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Lianming and Hu, Haibo and Li, Qiao and He, Xin and Guan, Nan and Xue, Chun Jason}, title = {GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2834-2843} }
Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification-
[pdf]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Shenghui and Wang, Rui and Xu, Tianyang and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2639-2648} }
What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shaobo and Xiong, Haobo and Liu, Kai and Lin, Yuna}, title = {What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2813-2822} }
RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yinzhou and Shang, Yu and Chen, Yinuo and Wei, Bingwen and Zhang, Xin and Yu, Shu'ang and Shi, Liangzhi and Yu, Chao and Gao, Chen and Wu, Wei and Li, Yong}, title = {RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1472-1481} }
IRL-VLA: Vision-Language-Action Training via Reward World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Anqing and Yu, Gao and Yuwen, Heng and Wang, Yiru and Shuo, Wang and Hao, Jiang and Hao, Sun}, title = {IRL-VLA: Vision-Language-Action Training via Reward World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {970-979} }
Softmax-GS: Generalized Gaussians Learning When to Blend or Bound-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Wang, Peng and Tan, Hao and Xu, Zexiang and Fuxin, Li}, title = {Softmax-GS: Generalized Gaussians Learning When to Blend or Bound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {508-517} }
On the Feasibility and Opportunity of Autoregressive 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zanming and Yoo, Jinsu and Jeon, Sooyoung and Liu, Zhenzhen and Campbell, Mark and Weinberger, Kilian Q and Hariharan, Bharath and Chao, Wei-Lun and Luo, Katie Z}, title = {On the Feasibility and Opportunity of Autoregressive 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1170-1179} }
LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jiazhou and Liu, Zhongyi and Shi, Ying and Zhao, Zhichun and Wang, Zhuoyu and Zhou, Yuhang and Hu, Huanling and Ye, Guangnan and Li, Mengtian and Guo, Lei}, title = {LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1629-1639} }
A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stoica_2026_CVPR, author = {Stoica, Maria and Hekal, Abdelrahman and Lomuscio, Alessio}, title = {A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {685-694} }
MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Nuolin and Wang, Linyuan and Wei, Haonan and Li, Lei and Yan, Bin}, title = {MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2691-2699} }
Speed3R: Sparse Feed-forward 3D Reconstruction Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Weining and Tan, Xiao and Han, Kai}, title = {Speed3R: Sparse Feed-forward 3D Reconstruction Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {119-128} }
Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN-
[pdf]
[supp]
[bibtex]@InProceedings{Zaghetto_2026_CVPR, author = {Zaghetto, C. and Purim, A. and Oliveira, W. and Ribeiro, J. R. and Nolla, H. and Santos, F. and Chang, M. and Vareto, R. H.}, title = {Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1221-1229} }
Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Mukkamala_2026_CVPR, author = {Mukkamala, Hanvitha Saraswathi and Pujari, Arun K}, title = {Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1680-1689} }
GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minsol and Ali, Usman}, title = {GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {33-42} }
AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yue and Wang, Yingyao and Bu, Pi and Xing, Jingxuan and Jiang, Wei and Zhu, Zekun and Ma, Junpeng and Zhou, Sashuai and Lu, Tong and Song, Jun and Cheng, Yu and Jiang, Yuning and Zheng, Bo}, title = {AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1903-1912} }
C^2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zhao, Kaiyan and Wang, Yiming and Yang, Ming and Rao, Bin and Li, Zhenning}, title = {C{\textasciicircum}2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1128-1137} }
Generative Event Pretraining with Foundation Model Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Jianwen and Xing, Jiaxu and Messikommer, Nico and Scaramuzza, Davide}, title = {Generative Event Pretraining with Foundation Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3189-3199} }
Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goral_2026_CVPR, author = {Goral, Gracjan and Ziarko, Alicja and Milos, Piotr and Nauman, Michal and Wolczyk, Maciej and Kosinski, Michal}, title = {Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1721-1730} }
BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Zhien and Tang, Zhaohui and Zhang, Hu and Pan, Mingjun and Luo, Jin and Xie, Yongfang}, title = {BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1-11} }
FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Tianhao and Jiang, Linlian and Zuo, Xinxin and Wang, Yang and Popa, Tiberiu}, title = {FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {202-212} }
Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification-
[pdf]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Lulu and Qin, Jiaxiang and Yan, Ruiheng and Pan, Ning and Liu, Haihua and Chen, Xinxin}, title = {Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2564-2574} }
HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Suzuki_2026_CVPR, author = {Suzuki, Keito and Chen, Kunyao and Wang, Lei and Du, Bang and Li, Runfa Blark and Liu, Peng and Bi, Ning and Nguyen, Truong}, title = {HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {624-634} }
Channel Correlation Loss for Binary Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Xindi and Zhang, Wei and Yu, Hai and Zhu, Zhiliang}, title = {Channel Correlation Loss for Binary Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2575-2584} }
JACoP: Joint Alignment for Compliant Multi-Agent Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Qingze Tony and Mrdovic, Alen and Li, Danrui and Schwartz, Mathew and Yoon, Sejong and Kapadia, Mubbasir}, title = {JACoP: Joint Alignment for Compliant Multi-Agent Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {910-919} }
Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Songlong and Wang, Weijie and Zhao, Zhengyu and Gu, Jindong and Torr, Philip and Sebe, Nicu}, title = {Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {737-747} }
Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Hu and Long, Chengjiang and Zhang, Jiqing and Jiang, Chuanlu and Ge, Huilin and Yin, Erwei and Yin, Baocai and Yang, Xin}, title = {Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1335-1345} }
Revisiting Articulated Parts Perception in Robot Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoqian and Guo, Yejie and Chen, Xiaoyang and Yang, Lixin and Lu, Cewu and Li, Yong-Lu}, title = {Revisiting Articulated Parts Perception in Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1368-1377} }
Vision Language Models are Confused Tourists-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Irawan_2026_CVPR, author = {Irawan, Patrick Amadeus and Hanif, Ikhlasul Akmal and Al Kautsar, Muhammad Dehan and Winata, Genta Indra and Koto, Fajri and Aji, Alham Fikri}, title = {Vision Language Models are Confused Tourists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1763-1773} }
Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tianle and Chakka, Chaitanya and Akula, Arjun Reddy and Thomas, Xavier and Ghadiyaram, Deepti}, title = {Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2142-2151} }
Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Biscarrat_2026_CVPR, author = {Biscarrat, Camille and Gharbi, Micha\"el and Goel, Rahul and Ragan-Kelley, Jonathan and Durand, Fr\'edo and Li, Tzu-Mao}, title = {Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1283-1293} }
From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Haoming and Liu, Jinnuo and Li, Yanhao and Bai, Liuyang and Ji, Yunkai and Guo, Yuanhe and Wan, Shenji and Wen, Hongyi}, title = {From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2649-2658} }
HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2408-2418} }
Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaobin and Diao, Changyu and Li, Min and Yu, Ruohan and Xu, Duanqing}, title = {Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {223-232} }
Rich Feature Learning via Diversification-
[pdf]
[supp]
[bibtex]@InProceedings{Leng_2026_CVPR, author = {Leng, Xi and Chen, Yongqiang and Tang, Xiaoying and Bian, Yatao}, title = {Rich Feature Learning via Diversification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2462-2472} }
SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yonghan and Huang, Tsung-Wei and Gehlot, Shiv and Choi, Jaehoon and Su, Guan-Ming and Manocha, Dinesh}, title = {SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {77-87} }
When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Hossain_2026_CVPR, author = {Hossain, Md Zarif and Fime, Awal Ahmed and Imteaj, Ahmed}, title = {When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {758-767} }
Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yizhao and Zhu, Hongyuan and Liu, Caiyun and Wang, Tianfu and Chen, Keyu and Xu, Sicheng and Yang, Jiaolong and Yuan, Nicholas jing and Zhang, Qi}, title = {Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {635-646} }
BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dang_2026_CVPR, author = {Dang, Ba Luan and Truong, Vu Tuan and Le, Long Bao}, title = {BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {726-736} }
PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Picard_2026_CVPR, author = {Picard, David and Dufour, Nicolas and Degeorge, Lucas and Ghosh, Arijit and Allegro, Davide and Ravaud, Tom and Perron, Yohann and Sautier, Corentin and Baltaci, Zeynep Sonat and Meng, Fei and Kalleli, Syrine and L\'opez-Rauhut, Marta and Loiseau, Thibaut and Albouy, S\'egol\`ene and Baena, Raphael and Vincent, Elliot and Landrieu, Loic}, title = {PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2544-2553} }
D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenlun and Zhong, Yunshan and Ding, Zihao and Li, Xinyu and Yoshioka, Kentaro}, title = {D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2978-2987} }
CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hangyu and Cao, Bofeng and Liang, Zhaohui and Li, Wuzhen and Oh, Juyoung and Chen, Yuxuan and Liang, Shixiao and Zhou, Hang and Ma, Chengyuan and Liu, Jiaxi and Li, Zheng and Zhang, Peng and Long, Keke and Liu, Maolin and Jiang, Jackson and Yu, Chunlei and Liu, Shengxiang and Yu, Hongkai and Li, Xiaopeng}, title = {CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2294-2303} }
RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Liu and Wang, Xiaofeng and Zhao, Guosheng and Li, Keyu and Qin, Wenkang and Zhu, Jiagang and Qiu, Jiaxiong and Huang, Guan and Su, Zhizhong}, title = {RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1410-1420} }
SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kao and Song, Tao and Hu, Zhihua and Li, Ming and Ding, Xin}, title = {SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2596-2605} }
VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2026_CVPR, author = {Pang, Bo and Xu, Chenxi and Ren, Jierui and Wang, Guoping and Li, Sheng}, title = {VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2028-2037} }
RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies-
[pdf]
[supp]
[bibtex]@InProceedings{Garcia-Cobo_2026_CVPR, author = {Garcia-Cobo, Guillermo and Igl, Maximilian and Karkus, Peter and Zhang, Zhejun and Watson, Michael and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1000-1009} }
Self-Evolving 3D Scene Generation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Kaizhi and Fan, Yue and Gu, Jing and Xu, Zishuo and He, Xuehai and Wang, Xin Eric}, title = {Self-Evolving 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {579-590} }
SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jikai and Gui, Xingtai and Gong, Jiahao and Tan, Feiyang and Han, Wencheng and Xu, Cheng-Zhong and Shen, Jianbing}, title = {SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1039-1049} }
Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Brahma_2026_CVPR, author = {Brahma, Debarshi and Biswas, Soma}, title = {Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {656-665} }
RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kilinc_2026_CVPR, author = {Kilinc, Ozsel and Tarhan, Cem}, title = {RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1159-1169} }
MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Changho and Kim, Minho and Kim, Jinkyu}, title = {MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2659-2668} }
RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yangfan and Zhang, Hanwei and Huang, Ke and Wang, Qiufeng and Shao, Zhenzhou and Wu, Dengyu}, title = {RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1492-1502} }
PolyReal: A Benchmark for Real-World Polymer Science Workflows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Wanhao and Wang, Weida and Xie, Jiaqing and Yang, Suorong and Wang, Jue and Chen, Benteng and Mei, Guangtao and Yang, Zonglin and Zhang, Shufei and Mo, Yuchun and Cheng, Lang and Zeng, Jin and Li, Houqiang and Ouyang, Wanli and Li, Yuqiang}, title = {PolyReal: A Benchmark for Real-World Polymer Science Workflows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1954-1964} }
Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Linsi and Shen, Gang and Lv, Xuefei and Wu, Chenglong and Pei, Yuru}, title = {Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2669-2679} }
On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Ali_2026_CVPR, author = {Ali, Ziad Tariq Muhammad and Azad, Raja Muhammad Atif and Azad, Muhammad Ajmal and Rice, Iain and Daraz, Umar and Imran, Ali Shariq and Holyhead, James}, title = {On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {809-818} }
HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shurui and Chen, Weide and Wu, Ancong}, title = {HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {538-546} }
WildAni4D: Towards 4D Animal Mesh Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Gyeongsu and Hu, Hezhen and Soon, Donghyeon and Kang, Changwoo and Joo, Kyungdon}, title = {WildAni4D: Towards 4D Animal Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {160-169} }
PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Wang, Yan and Shao, Wenrui and Xie, Zhaoheng}, title = {PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2534-2543} }
LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rui_2026_CVPR, author = {Rui, Yicheng and Duan, Xiao-Wei and Deng, Licai and Yang, Fan and Dang, Zhengming and Du, Zhengjun and Peng, Junhao and Chu, Wenhao and Mahmut, Umut and Li, Kexin and Wu, Yiyun and Feng, Fabo}, title = {LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1774-1785} }
Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junyu and Harun, Md Yousuf and Kanan, Christopher}, title = {Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2284-2293} }
RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zhichao and Liu, Yepeng and Su, Zhiling and Zhu, Huachao and Gu, Yuliang and Zou, Yuda and Liu, Zelong and Xia, Gui-Song and Du, Bo and Xu, Yongchao}, title = {RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1752-1762} }
ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mitra_2026_CVPR, author = {Mitra, Sirshapan and Rawat, Yogesh S}, title = {ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {22-32} }
Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Feng and Gou, Chenhui and He, Yefei and Yang, Yang and Zhuang, Bohan and Wu, Qi}, title = {Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3050-3059} }
Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Zhiqiu and Mu, Furong and Li, Qi and Zhang, Shanshan and Gui, Jie and Wang, Chunpeng and Liu, Yunan}, title = {Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1650-1659} }
A Simple Framework for Visual Navigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Johnson_2026_CVPR, author = {Johnson, Faith and Cao, Bryan Bo and Jain, Shubham and Ashok, Ashwin and Dana, Kristin}, title = {A Simple Framework for Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3167-3177} }
Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Yiwei and Chen, Zan and Wang, Bo and Zhou, Xiaofei}, title = {Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2324-2334} }
UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Junliang and Huang, Zehuan and Qu, Yansong and Wang, Chunshi and Yang, Yunhan and Li, Yang and Luo, Yawei and Chen, Zhuo and Lu, Sheng and Zhu, Jun and Guo, Chunchao}, title = {UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {613-623} }
Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and S, Aarthi and Agarwal, Akshay}, title = {Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {857-866} }
Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Huan and Chen, Ping and Chen, Zezhou and Liu, Zhaoxiang and Wang, Zipeng and Liu, Xiang and Wang, Xin and Wang, Kai and Lian, Shiguo}, title = {Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1986-1995} }
FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Jinshan and Huang, Tingxuan and Jiang, Baoyang and Xiang, Liuyu and Ma, Qiang and Hu, Jianwei}, title = {FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2514-2523} }
Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kawachi_2026_CVPR, author = {Kawachi, Hodaka and Nakamura, Tomoya and Santo, Hiroaki and Tedla, SaiKiran Kumar and Canham, Trevor D and Yagi, Yasushi and Brown, Michael S.}, title = {Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1273-1282} }
AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhaorong and Kanamori, Yoshihiro and Endo, Yuki}, title = {AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {425-435} }
DeepFakeShield: A Proactive Defense Against Malicious Face Swapping-
[pdf]
[supp]
[bibtex]@InProceedings{Karimi-Bidhendi_2026_CVPR, author = {Karimi-Bidhendi, Saeed and DeGol, Joseph and Wengrowski, Eric and Roberts, Dominic and Dana, Kristin}, title = {DeepFakeShield: A Proactive Defense Against Malicious Face Swapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {867-877} }
FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Shenghe and Zhang, Minyu and Liu, Tianhao and Wang, Hongzhi}, title = {FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2793-2802} }
VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Andong and Du, Dawei and Chen, Zhenfang and Zhong, Wen and Chen, Fan and Chen, Guang and Kuo, Chia-Wen and Wen, Longyin and Chen, Chen and Zhu, Sijie}, title = {VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2187-2196} }
Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Zhenhe and Cao, Congqi and Hu, Lanshu and Pan, Liujie}, title = {Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2554-2563} }
ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lingjun and Luo, Yandong and Hays, James and Gan, Lu}, title = {ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1398-1409} }
PEPR: Privileged Event-based Predictive Regularization for Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Magrini_2026_CVPR, author = {Magrini, Gabriele and Becattini, Federico and Biondi, Niccol\`o and Pala, Pietro}, title = {PEPR: Privileged Event-based Predictive Regularization for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3209-3219} }
DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting-
[pdf]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Minwei and Wei, Yang and Xiao, Junhao and Bi, Xiuli and Xiao, Bin}, title = {DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {779-788} }
CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Bingyi and Zhu, Chuanhui and Xue, Hongfei and Teng, Jian and Liu, Jipeng and Wang, Enshu and Dai, Penglin and Wang, Pu}, title = {CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {99-108} }
Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Canyu and Yang, Yuguang and Tan, Zhewen and Wang, Yizhi and Zhan, Ruiyi and Liu, Haiyan and Mao, Xuanyao and Bao, Jason and Tang, Xinyue and Yang, Linlin and Sun, Bingchuan and Wang, Yan and Zhang, Baochang}, title = {Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1062-1072} }
APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Geunyoung and Kim, Soohong and Kong, Inseok and Jung, Jiyoung}, title = {APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {789-798} }
Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Cong and Cheng, Gong}, title = {Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3040-3049} }
Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yachan and Lu G\'omez, Jose and Xue, Danna and Xiao, Yi and L\'opez, Antonio M.}, title = {Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3231-3240} }
AR4D: Autoregressive 4D Generation from Monocular Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hanxin and He, Tianyu and Chen, Zhibo}, title = {AR4D: Autoregressive 4D Generation from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {88-98} }
HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Spetlik_2026_CVPR, author = {Spetlik, Radim and Pliska, Michal and Vrba, Vojt\v{e}ch and Matas, Ji\v{r}{\'\i}}, title = {HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3200-3208} }
OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenguo and Zheng, Haohan and Wang, Yishen and Xu, Le and Deng, Tianchen and Chen, Xuefeng and Chen, Qu and Zhang, Bo and Huang, Wuxiong}, title = {OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1106-1116} }
OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM-
[pdf]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Leqian and Li, Caibo and Guo, Yu and Wang, Fei}, title = {OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1431-1440} }
Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyao and Zhang, Kaipeng and Shieh, Michael Qizhe}, title = {Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1230-1239} }
PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Maleki_2026_CVPR, author = {Maleki, Armin and Radha, Hayder}, title = {PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1138-1147} }
UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yimu and Zhuang, Weiming and Chen, Chen and Huang, Jiabo and Li, Jingtao and Lyu, Lingjuan}, title = {UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2904-2914} }
Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hassani_2026_CVPR, author = {Hassani, Ali and Zhou, Fengzhe and Kane, Aditya and Huang, Jiannan and Chen, Chieh-Yun and Shi, Min and Walton, Steven and Hoehnerbach, Markus and Thakkar, Vijay and Isaev, Mikhail and Zhang, Qinsheng and Xu, Bing and Wu, Haicheng and Hwu, Wen-mei and Liu, Ming-Yu and Shi, Humphrey}, title = {Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3009-3018} }
SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kawada_2026_CVPR, author = {Kawada, Takuro and Kitada, Shunsuke and Nemoto, Sota and Iyatomi, Hitoshi}, title = {SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2250-2260} }
InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Zhiqiang and Han, Xumeng and Zhang, Zhiwei and Xiong, Zenghui and Ding, Yifan and Ping, Aoxiang and Li, Xiang and Guo, Tong and Mao, Yao}, title = {InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2176-2186} }
RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyu and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {547-557} }
JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Dongyun and Zhang, Zhuoyang and Chen, Junyu and He, Wenkun and Peng, Qinhe and Ye, Hanrong and Lu, Yao and Yin, Hongxu and Wang, Yu and Han, Song and Cai, Han}, title = {JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2957-2967} }
Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qihong and Yan, Shaokang and Qiao, Zhimin and Wang, Jinjia and Xiong, Bo}, title = {Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {478-487} }
Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Chenhao and Zhu, Yichen and Wen, Junjie and Chen, Yefei and Liu, Ziang and Fang, Faming}, title = {Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1388-1397} }
FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Devkota_2026_CVPR, author = {Devkota, Alina and Thrasher, Jacob and Adjeroh, Donald and Bhattarai, Binod and Gyawali, Prashnna k.}, title = {FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2803-2812} }
iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Mu_2026_CVPR, author = {Mu, Pan and Zhu, Yuchao and Zhang, Shiqi and Yan, Hanting and Zhang, Jinglin and Bai, Cong}, title = {iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1587-1596} }
Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yibo and Zhang, Ziwei and Pang, Haozhou and Li, Menghao and He, Lanshan and Qi, Gan}, title = {Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {381-390} }
Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuohao and Li, Zeng and Zhang, Yifei and Liu, Chang and Zhou, Yu}, title = {Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1577-1586} }
Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yue and Wang, Rui and Lin, Jiehong and Wang, Zhongrui and Qi, Xiaojuan}, title = {Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1358-1367} }
SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jungho and Lee, Minhyeok and Yang, Sunghun and Kang, Minseok and Lee, Sangyoun}, title = {SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {447-456} }
Affine Bases for Affine Spaces-
[pdf]
[supp]
[bibtex]@InProceedings{Dogadov_2026_CVPR, author = {Dogadov, Gabriel and Alexa, Marc}, title = {Affine Bases for Affine Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {213-222} }
A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jena_2026_CVPR, author = {Jena, Pritham K and Baburaj, Bhavika and Anand, Tushar and Dutta, Vedant and Ulavala, Vineeth and Ali, Sk Aziz}, title = {A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1913-1923} }
CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yanlin and Liu, Yuchen and Liu, Mingren}, title = {CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1117-1127} }
The DeepSpeak Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barrington_2026_CVPR, author = {Barrington, Sarah and Bohacek, Maty and Farid, Hany}, title = {The DeepSpeak Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1893-1902} }
ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hsu_2026_CVPR, author = {Hsu, Chih-Chung and Ma, Xin-Di and Liao, Wo-Ting and Lee, Chia-Ming}, title = {ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2988-2997} }
When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Aditya}, title = {When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {829-836} }
Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Purohit_2026_CVPR, author = {Purohit, Vishal and Chen, Wei and Qiu, Qiang}, title = {Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1263-1272} }
Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xuepei and Feng, Mingtao and Dong, Weisheng and Chen, Lin and Feng, Jie and Wu, Fangfang and Zhu, Yufan and Mian, Ajmal Saeed}, title = {Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {942-951} }
Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Shin_2026_CVPR, author = {Shin, Jeongwan and Kim, Jaehyeon and Ko, Donguk and Choi, Jaeho}, title = {Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2208-2219} }
FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chaoyi and Wang, Run and Luo, Feng and Pes\'e, Mert D. and Fan, Zhiwen and Zhong, Yiqi and Huang, Siyu}, title = {FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {129-138} }
HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Soroco_2026_CVPR, author = {Soroco, Mauricio and Pittaluga, Francesco and Tasneem, Zaid and Aich, Abhishek and Zhuang, Bingbing and Chen, Wuyang and Chandraker, Manmohan and Jiang, Ziyu}, title = {HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {952-959} }
CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Pingrui and Zhou, Yanshan and Xie, Zihao and Yang, Hua}, title = {CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2197-2207} }
Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonghwan and Kim, Wontaek and Lu, Yidan and Cheng, Jin and Zargarbashi, Fatemeh and Zeng, Zicheng and Qi, Zekun and Dou, Zhiyang and Sontakke, Nitish and Baek, Donghoon and Yi, Li and Ha, Sehoon and Li, Tianyu}, title = {Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1421-1430} }
When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yanhui and Zhou, Qi and Xu, Zhihong and Guo, Huizhong and Wang, Wenhai and Wang, Dongxia}, title = {When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2038-2048} }
See Tomorrow, Act Today: Foresight-Driven Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bozhou and Song, Nan and Wang, Yuang and Deng, Jiankang and Zhu, Xiatian and Zhang, Li}, title = {See Tomorrow, Act Today: Foresight-Driven Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1180-1190} }
GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Zekun and Han, Ruize and Wang, Zhixiang and Wan, Liang and Feng, Wei}, title = {GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1872-1882} }
Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mingjie and Kim, Edward and Zhao, Yue and Adeli, Ehsan and Pohl, Kilian M.}, title = {Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1211-1220} }
RoadTones: Tone Controllable Text Generation from Road Event Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parikh_2026_CVPR, author = {Parikh, Chirag and Lipare, Siddhi Pravin and Sarvadevabhatla, Ravi Kiran}, title = {RoadTones: Tone Controllable Text Generation from Road Event Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1019-1028} }
SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Inadumi_2026_CVPR, author = {Inadumi, Shun and Tanaka, Shohei and Hirasawa, Tosho and Hashimoto, Atsushi and Yoshino, Koichiro and Ushiku, Yoshitaka}, title = {SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2131-2141} }
Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Tan, Hao and Wang, Peng and Xu, Zexiang and Fuxin, Li}, title = {Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {370-380} }
Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wanying and Chen, Zhuo and Lu, Jianzhi and Ma, Chenxi and Tan, Weimin and Yan, Bo}, title = {Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2721-2730} }
HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion-
[pdf]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Aihua and Yang, Jun and Liu, Yong-Jin and He, Ying}, title = {HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {149-159} }
DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yani and Wu, Dongming and Shi, Hao and Liu, Yingfei and Wang, Tiancai and Dong, Xingping}, title = {DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3103-3113} }
Shape and Texture Recognition in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Eppel_2026_CVPR, author = {Eppel, Sagi and Bismut, Mor and Strugatski, Alona}, title = {Shape and Texture Recognition in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1839-1849} }
TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Honghui and Fang, Chuangjie and Meng, Yiqun and Jiang, Jiawei and Chan, Sixian and Zhang, Shiqing and Zheng, Jianwei}, title = {TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1670-1679} }
An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhou and Tang, Qiling and Chai, XinYu and Liu, Rong and Li, Zhi and Liu, Liman}, title = {An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3241-3251} }
M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Haolong and Tan, Kaijun and Shen, Yeqing and Huang, Xin and Wang, Jia and Ge, Zheng and Zhang, Xiangyu and Li, Si and Jiang, Daxin}, title = {M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2731-2741} }
Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenbing and Liang, Jianing and Cheng, Linjie and Pan, Yurui and Chen, Zhuhao and Yan, Qingwang and Cheng, Yudong and Zhang, Jianghui and Chi, Mingmin and Peng, Bo}, title = {Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2060-2068} }
AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yulu and Cheng, Jiujun and Wang, Haowen and Suo, Dengyang and Ren, Pei and Mao, Qichao and Gao, Shangce and Huang, Yakun}, title = {AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3082-3091} }
PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiangyu and Wang, Chen and Liu, Yumao and He, Dengbo and Zhang, Jiahao and Ma, Ke}, title = {PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1010-1018} }
OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhishan and Wei, Siyuan and Wang, Zengran and Wang, Chunjie and Yan, Xiaosheng and Liu, Xiao}, title = {OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {233-242} }
RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jingqi and Lu, Jingxi and Li, Chenghao and Sarkar, Sreetama and Kundu, Souvik and A Beerel, Peter}, title = {RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2783-2792} }
Tiny Inference-Time Scaling with Latent Verifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bucciarelli_2026_CVPR, author = {Bucciarelli, Davide and Turri, Evelyn and Baraldi, Lorenzo and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Tiny Inference-Time Scaling with Latent Verifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2873-2882} }
THEval. Evaluation Framework for Talking Head Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Quignon_2026_CVPR, author = {Quignon, Nabyl and Chopin, Baptiste and Wang, Yaohui and Dantcheva, Antitza}, title = {THEval. Evaluation Framework for Talking Head Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1943-1953} }
Jailbreaking Frontier Foundation Models Through Intention Deception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinhe and Sycara, Katia and Xie, Yaqi}, title = {Jailbreaking Frontier Foundation Models Through Intention Deception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {666-674} }
Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes-
[pdf]
[supp]
[bibtex]@InProceedings{Ran_2026_CVPR, author = {Ran, Weihang and Zhu, Qingtian and Cao, Mingdeng and Yuan, Wei and Echizen, Isao and Zheng, Yinqiang}, title = {Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1597-1607} }
Rethinking Compact (<1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions-
[pdf]
[supp]
[bibtex]@InProceedings{Kyrkou_2026_CVPR, author = {Kyrkou, Christos}, title = {Rethinking Compact (\ensuremath{<}1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2710-2720} }
Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Nagy_2026_CVPR, author = {Nagy, Rajmund and Voss, Hendric and Hoang-Minh, Thanh and Tsakov, Mihail and Nikolov, Teodor and Zhang, Zeyi and Ao, Tenglong and Yang, Sicheng and Huang, Shaoli and Cheng, Yongkang and Mughal, M. Hamza and Dabral, Rishabh and Chhatre, Kiran and Theobalt, Christian and Liu, Libin and Kopp, Stefan and McDonnell, Rachel and Neff, Michael and Kucherenko, Taras and Yoon, Youngwoo and Henter, Gustav Eje}, title = {Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2152-2164} }
Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xuanming and An, Baoyi and Nie, Dingyu and Ren, Haoyu and Zou, Zhengwei and Yang, Yizhe and Shen, Jialie and Jin, Zhiwen and Qian, Xueming and Yang, Zhongyu and Zhao, Guoshuai}, title = {Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1608-1617} }
SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhuohang and Yuan, Xu and Qu, Haohao and Lin, Shanru and Liu, Kanglong and Fan, Wenqi and Qing, Li}, title = {SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2165-2175} }
MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Ji, Yifei and Han, Yongxuan and Zheng, Zhenzhe}, title = {MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2926-2936} }
Catalyst: Out-of-Distribution Detection via Elastic Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hassan_2026_CVPR, author = {Hassan, Abid and Ngo, Tuan and Shafiq, Saad and Medvidovic, Nenad}, title = {Catalyst: Out-of-Distribution Detection via Elastic Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1618-1628} }
Multimodal Large Language Models as Image Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kisel_2026_CVPR, author = {Kisel, Nikita and Volkov, Illia and Janouskova, Klara and Matas, Jiri}, title = {Multimodal Large Language Models as Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1711-1720} }
PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Jie and Wu, JunXiang and An, Nan and Zhang, Zhen and Xiang, Shuiying and Zhang, Mingjin and Li, Yunsong and Gao, Yu'e}, title = {PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2367-2376} }
SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Fudong and Liu, Dingning and Wang, Hanshi and Zhang, Yiwei and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {900-909} }
CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tung-I and Wang, Lingdong and Maji, Subhransu and Sitaraman, Ramesh K.}, title = {CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {457-467} }
Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Ji Woong and Wang, Ke and Fu, Zipeng and Chen, Sirui and zhao, Cong and Lai, Jeff and Finn, Chelsea}, title = {Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1515-1524} }
SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tanaka_2026_CVPR, author = {Tanaka, Shohei and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2753-2762} }
Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs-
[pdf]
[supp]
[bibtex]@InProceedings{Gandhi_2026_CVPR, author = {Gandhi, Mona and Joseph, K.J. and Parthasarathy, Srinivasan and Nag, Sayan}, title = {Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1786-1796} }
Learning a Particle Dynamics Model with Real-World Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Chanho and Sumukh, Suhas V. and Fuxin, Li}, title = {Learning a Particle Dynamics Model with Real-World Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {498-507} }
Guided Lensless Polarization Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kraicer_2026_CVPR, author = {Kraicer, Noa and Yosef, Erez and Giryes, Raja}, title = {Guided Lensless Polarization Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1252-1262} }
DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Piland_2026_CVPR, author = {Piland, Jacob and Sweet, Christopher and Czajka, Adam}, title = {DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1201-1210} }
dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yingzi and Cao, Yulong and Ding, Wenhao and Zhang, Shuibai and Wang, Yan and Ivanovic, Boris and Jiang, Ming and Pavone, Marco and Xiao, Chaowei}, title = {dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1050-1061} }
GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xiaolei and Fang, Chuangjie and Wu, Jie and Yang, Jingyi and Lin, Boyi and Zheng, Jianwei}, title = {GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {243-252} }
Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Pu and Li, Huafeng and Zhang, Yafei and Liu, Yu and Wang, Wen}, title = {Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1304-1313} }
CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sammani_2026_CVPR, author = {Sammani, Fawaz and Fischer, Jonas and Deligiannis, Nikos}, title = {CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3262-3272} }
From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Fei and Liu, Yu and Tang, Luyang and Sun, Mingchao and Ge, Zengye and Bu, Rui and Jin, Yuchao and Zhao, Haisen and Sun, He and Li, Yangyan and Xu, Mu and Chen, Wenzheng and Chen, Baoquan}, title = {From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {391-402} }
From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yukang and Wu, Wenxiao and Sun, Jianwen and Li, Chuanhao and Zhang, Fanrui and Li, Zizhen and Ai, Jiaxin and Zhou, Sizhuo and Chang, Yifan and Gao, Changxin and Zhang, Shenglin and Zhang, Kaipeng}, title = {From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2220-2229} }
M^3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Hu, Yuxuan and Geng, Haoran and Chen, Xiangyu and Zhou, Chuhao and Cui, Ziteng and Yang, Jianfei}, title = {M{\textasciicircum}3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3070-3081} }
Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Xian-Hua}, title = {Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1294-1303} }
WGS: Watertight Geometry Standardization for Scalable 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Dehao and Jia, Tanghui and Zhang, Kaiyi and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Zhang, Runze and Zhu, Lingting and Yuan, Li and Wang, Xin and Quan, Long}, title = {WGS: Watertight Geometry Standardization for Scalable 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {569-578} }
ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Boran and Wang, Xinming and Chen, Yi and Li, Xiang and Xu, Jian and Yuan, Jing and Liu, Cheng-Lin}, title = {ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2773-2782} }
Dual Strategies for Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phuong_2026_CVPR, author = {Phuong, Nam Nguyen and Minh, Duc Nguyen The and Le Nguyen, Phi and Abbasnejad, Ehsan and Hoai, Minh}, title = {Dual Strategies for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2483-2492} }
CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jindal_2026_CVPR, author = {Jindal, Akshit and Anand, Saket and Arora, Chetan and Goyal, Vikram}, title = {CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {716-725} }
The Mechanics of CNN Filtering with Rectification-
[pdf]
[supp]
[bibtex]@InProceedings{Frija-Altarac_2026_CVPR, author = {Frija-Altarac, Liam and Toews, Matthew}, title = {The Mechanics of CNN Filtering with Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1690-1699} }
AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging-
[pdf]
[bibtex]@InProceedings{Bazarvaani_2026_CVPR, author = {Bazarvaani, Zuchi and Lee, Seung-Ho and Ahn, Jeongmin and Jeon, Donghyeon and Kang, Inho and Na, Seung-Hoon}, title = {AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2700-2709} }
Re^2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jiakun and Xiao, Ting and Cao, Shiqin and Li, Xinran and Wang, Zhe and Bai, Chenjia}, title = {Re{\textasciicircum}2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1378-1387} }
Fast Generative DeOcclusion for Visual Geometry and Robotics-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jieneng and Zhang, Tiezheng and Xuan, Xiwei and He, Ju and Yin, Yifan and Shi, Haojun and Ye, Suyu and Li, Xinyi and Yuan, Ruisheng and Shu, Tianmin and Yuille, Alan}, title = {Fast Generative DeOcclusion for Visual Geometry and Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1314-1324} }
LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map-
[pdf]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Wei and Wang, Xiaohan and Liu, Yuehu}, title = {LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1462-1471} }
VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Cheng, Chong and Yu, Sicheng and Guo, Xiaoyang and Wang, Hao}, title = {VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {414-424} }
Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Kaiyuan and Xie, Shuangyu and Goldberg, Andrew and Goldberg, Ken}, title = {Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1452-1461} }
Object Pose Transformer: Unifying Unseen Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weihang and Garattoni, Lorenzo and Despinoy, Fabien and Navab, Nassir and Busam, Benjamin}, title = {Object Pose Transformer: Unifying Unseen Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {436-446} }
Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Ni, Haomin and Lai, Hanjiang}, title = {Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {819-828} }
Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jie and Zhao, Na}, title = {Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {327-337} }
Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noguchi_2026_CVPR, author = {Noguchi, Chihiro and Yamamoto, Takaki}, title = {Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1096-1105} }
Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media-
[pdf]
[supp]
[bibtex]@InProceedings{K.M_2026_CVPR, author = {K.M, Megha Mariam and Balasubramanian, Vineeth N. and Jawahar, C.V.}, title = {Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2079-2088} }
Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Powei and Abe, Jiro and Ogura, Kazumine}, title = {Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {109-118} }
Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yujie and Qin, Haotong and Zhang, Zhang and Magno, Michele and Benini, Luca and Li, Yawei}, title = {Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2524-2533} }
Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ro_2026_CVPR, author = {Ro, Yusung and Choi, Jaehyun and Kim, Junmo}, title = {Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3252-3261} }
Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs-
[pdf]
[bibtex]@InProceedings{chen_2026_CVPR, author = {chen, Rongqian and Andreyev, Allison and Xiu, Yanming and Chilukuri, Joshua and Sen, Shunav and Imani, Mahdi and Li, Bin and Gorlatova, Maria and Tan, Gang and Lan, Tian}, title = {Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {799-808} }
WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lezhong and Kaya, Mehmet Onurcan and Bigdeli, Siavash Arjomand and Frisvad, Jeppe Revall}, title = {WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2007-2016} }
Context-Aware Semantic Segmentation via Stage-Wise Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Carreaud_2026_CVPR, author = {Carreaud, Antoine and Naha, Elias and Chansel, Arthur and Lahellec, Nina and Skaloud, Jan and Gressin, Adrien}, title = {Context-Aware Semantic Segmentation via Stage-Wise Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2680-2690} }
Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xincheng and Sun, Hanchi and Sun, Wenjun and Xue, Kejun and Zhou, Wangqiu and Zhang, Jianbo and Sun, Wei and Zhu, Dandan and Min, Xiongkuo and Jia, Jun and Fang, Zhijun}, title = {Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2230-2239} }
Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyi and Liu, Zhou and Zhang, Wentao}, title = {Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1996-2006} }
OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Yuping and Xie, Yuhan and Li, Yuanshuai and Yu, Yingchao and Lyu, Lingjuan and Jin, Yaochu}, title = {OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1965-1975} }
ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Hao and Liang, Chengbin and Guo, Wenqi and Qin, Zhijin and Han, Jungong}, title = {ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2915-2925} }
In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yunxuan and Liu, Bohao and Wu, Yanxia and Li, Rongsheng}, title = {In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2398-2407} }
Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xiankang and Guo, Dongyan and Li, Hongji and Cui, Ying and Weng, Libo and Li, Ruibo and Zhang, Chi}, title = {Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {591-601} }
PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Wenxiang and Chen, Anpei and Yu, Haoming and Shen, Yujun and Xu, Weiwei}, title = {PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3156-3166} }
Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features-
[pdf]
[bibtex]@InProceedings{Seong_2026_CVPR, author = {Seong, Minseo and Kim, Youngwook}, title = {Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2473-2482} }
Name That Part: 3D Part Segmentation and Naming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paul_2026_CVPR, author = {Paul, Soumava and Kaushik, Prakhar and Vaidya, Ankit and Bhattad, Anand and Yuille, Alan}, title = {Name That Part: 3D Part Segmentation and Naming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1808-1817} }
FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series-
[pdf]
[bibtex]@InProceedings{Garg_2026_CVPR, author = {Garg, Devansh}, title = {FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1934-1942} }
Intelligent Photo Retouching with Language Model-Based Artist Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Haoyu and Tao, Keda and Wang, YiZao and Wang, Xinlei and Zhu, Lei and Gu, Jinjin}, title = {Intelligent Photo Retouching with Language Model-Based Artist Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1240-1251} }
A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaidong and Zhang, Jian and Xu, Rongtao and Sun, Yu and Wen, Youpeng and Xue, Shuoshuo and Guo, Xiaoyu and Guo, Minghao and Liufu, Weijia and Zihou, Liu and Ji, Kangyi and Li, Zihang and Chen, Ruiyi and Cao, Meng and Zhang, Jingming and Zhao, Shen and Chang, Xiaojun and Zheng, Feng and Laptev, Ivan and Liang, Xiaodan}, title = {A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1503-1514} }
3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Sizhe and Chi, Yankuan and Zhong, Shuhan and Chan, S.-H. Gary}, title = {3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {360-369} }
Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Rubab_2026_CVPR, author = {Rubab, Syeda Fiza and Ghaffar, Arslan Abdul and Gul, Malik Junaid Jami and Murtala, Sheriff and Lee, Ingyu and Choi, Gyu Sang}, title = {Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2844-2851} }
Phantasia: Context-Adaptive Backdoors in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Nam Duong and Le Nguyen, Phi}, title = {Phantasia: Context-Adaptive Backdoors in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {695-704} }
DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Haiduo and Song, Jiangcheng and Zhang, Yadong and Wang, Guansu and Ren, Pengju}, title = {DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2111-2120} }
Efficient Document Parsing via Parallel Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Lei and Zhao, Ze and Li, Meng and Lun, Zhongwang and Yuan, Yi and Lu, Xingjing and Wei, Zheng and Bian, Jiang and Li, Zang}, title = {Efficient Document Parsing via Parallel Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2763-2772} }
Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Qinlin and Zhou, Mingliang and Liao, Xingran}, title = {Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2629-2638} }
NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Lizhi and Feng, Jianguo and Li, Ziqiang and Li, Jun and Jiang, Weiwei and Fu, Zhangjie}, title = {NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {675-684} }
4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Dai, Jun and Chen, Yutian and Xu, Linning and Yu, Mulin and Wang, Yujin and Guo, Shi and Le, Xinyi and Xue, Tianfan}, title = {4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {43-53} }
ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yanzhao and Ding, Yi and Tang, Peijun and Yang, Haotian and Qi, Xianbiao and Wang, Jianan and Wang, Xueqian}, title = {ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1441-1451} }
Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jingi and Kim, Wonjun}, title = {Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {139-148} }
Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Mingbo and Yan, Han and Zhang, Bolun and Ran, Wu and Ma, Chao}, title = {Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {181-190} }
MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Haote and Wang, Hui and Zhu, Chen and Wang, Jingchao and Li, Linye and Lai, Hongbin and Ao, Huijie and Lv, Yongxuan and Wu, Jiang and Sun, Jiaxing and Chen, Lua and Cao, Yuanyuan and Zhang, Ruijie and Lu, Shengxin and Wu, Lijun and Wang, Bin and He, Conghui}, title = {MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1924-1933} }
Latent Domain Modeling Improves Robustness to Geographic Shifts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Crasto_2026_CVPR, author = {Crasto, Ruth and Rolf, Esther}, title = {Latent Domain Modeling Improves Robustness to Geographic Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2419-2428} }
MADrive: Memory-Augmented Driving Scene Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karpikova_2026_CVPR, author = {Karpikova, Polina and Selikhanovych, Daniil and Struminsky, Kirill and Musaev, Ruslan and Golitsyna, Maria and Baranchuk, Dmitry}, title = {MADrive: Memory-Augmented Driving Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {54-65} }
INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Madinei_2026_CVPR, author = {Madinei, Parsa and Solgi, Ryan and Wen, Ziqi and Skaza, Jonathan and Eckstein, Miguel and Pedarsani, Ramtin}, title = {INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2947-2956} }
Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Domico_2026_CVPR, author = {Domico, Kyle and Ferrand, Jean-Charles Noirot and Sheatsley, Ryan and Pauley, Eric and Hanna, Josiah and McDaniel, Patrick}, title = {Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {647-655} }
3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beiyuan and Li, Hesong and Shao, Ruiwen and Fu, Ying}, title = {3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {306-315} }
Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yilin and Luo, Dawei and Chen, Shuai and Xu, Feng and Wang, Jiachi and Feng, Zunlei and Bei, Yijun}, title = {Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2314-2323} }
Enriching Knowledge Distillation with Cross-Modal Teacher Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mansourian_2026_CVPR, author = {Mansourian, Amir M. and Babaei, Amir Mohammad and Kasaei, Shohreh}, title = {Enriching Knowledge Distillation with Cross-Modal Teacher Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2893-2903} }
Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Fu, Zhouchao and Lu, Wenbin and Zheng, Junjie and Xu, Junnan and Liao, Junjie and Zheng, Jianwei}, title = {Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1546-1555} }
MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Zhenhan and Wang, Xuanhan and Jiang, Jiahao and Deng, Kaiyuan and Chen, Pengqi and Li, Shuangle and Liu, Chong and Xu, Xing and Song, Jingkuan and Gao, Lianli and Shen, Heng Tao}, title = {MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1535-1545} }
3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sautter_2026_CVPR, author = {Sautter, Tobias and Dihlmann, Jan-Niklas and A Lensch, Hendrik P}, title = {3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {528-537} }
GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Beibei and Cao, Xiao and Guo, Jingyuan and Tan, Robby T.}, title = {GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {275-284} }
Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Zhexi and Wang, Haoran and Yan, Xuerun and Lin, Weimeng and Zhang, Xianhong and Chen, Yongyu and Hu, Jia}, title = {Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {920-930} }
From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarabia_2026_CVPR, author = {Sarabia, Rafael Pablos and Nyborg, Joachim and Birk, Morten and Assent, Ira}, title = {From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2606-2617} }
Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2026_CVPR, author = {Yi, Ding and Yu, Yanzhao and Dai, Xili and Qi, Xianbiao and Sun, Peiwen and Wang, Xueqian and Yue, Xiangyu and Wang, Jianan}, title = {Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1346-1357} }
EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Lulin and Li, Dayou and Liang, Yiqing and Jiang, Sicong and Vijay, Hitesh and Hu, Hezhen and Xu, Xuhai and Liu, Zirui and Shakkottai, Srinivas and Li, Manling and Fan, Zhiwen}, title = {EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2017-2027} }
Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Pengfei and Luo, Sifu and Wu, Feng and Zhou, Fan and Zhong, Ting}, title = {Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1148-1158} }
VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Rivera, Esteban and Lienkamp, Markus}, title = {VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {960-969} }
Event-Based Optical Flow Leveraging Precise Event Timing-
[pdf]
[supp]
[bibtex]@InProceedings{Greatorex_2026_CVPR, author = {Greatorex, Hugh and Chicca, Elisabetta}, title = {Event-Based Optical Flow Leveraging Precise Event Timing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3178-3188} }
Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Frahm_2026_CVPR, author = {Frahm, Noah and Patel, Prakrut and Zhang, Yue and Yu, Shoubin and Bansal, Mohit and Sengupta, Roni}, title = {Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3114-3123} }
MPM: Mutual Pair Merging for Efficient Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Rave_2026_CVPR, author = {Rav\'e, Simon and Rasti, Pejman and Rousseau, David}, title = {MPM: Mutual Pair Merging for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2998-3008} }
Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jiaying and Ye, Shuquan and Xu, Dan and Ouyang, Wanli and Lau, Rynson W. H.}, title = {Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1818-1827} }
AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning-
[pdf]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qilin and Fan, Qilin and Li, Xinrui and Wang, Tianfu and Qiu, Shuting and Niu, Yue}, title = {AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3019-3028} }
InstructTable: Improving Table Structure Recognition Through Instruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Boming and Wang, Zining and Guo, Zhentao and Liu, Jianqiang and Duan, Chen and Gu, Yu and zhou, Kai and Yan, Pengfei}, title = {InstructTable: Improving Table Structure Recognition Through Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2742-2752} }
AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jintao and Dong, Bowen and Shi, Weikang and Lei, Chenyang and Zhang, Suiyun and Liu, Rui and Liu, Xihui}, title = {AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1797-1807} }
LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Woo-Jin and Paek, Dong-Hee and Kong, Seung-Hyun}, title = {LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {889-899} }
DINO-VO: Learning Where to Focus for Enhanced State Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Qi and Li, Guanghao and Hu, Sijia and Gao, Xin and Ma, Junpeng and Xue, Xiangyang and Pu, Jian}, title = {DINO-VO: Learning Where to Focus for Enhanced State Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1556-1566} }
GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tanay_2026_CVPR, author = {Tanay, Thomas and Brahimi, Mohammed and Nazarczuk, Michal and Zhang, Qingwen and Catley-Chandar, Sibi and Moreau, Arthur and Zhang, Zhensong and P\'erez-Pellitero, Eduardo}, title = {GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {348-359} }
The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Yadav_2026_CVPR, author = {Yadav, Garima Arya and Yilmaz, Nilay and Yang, Yezhou}, title = {The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2069-2078} }
SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Kang and Xiang, Wei and Yu, Lu and Wyatt, Mathew and Liu, Gaowen and Kompella, Ramana Rao}, title = {SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {12-21} }
SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Zhimin and Yadav, Abhay and Chellappa, Rama and Peng, Cheng}, title = {SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {253-263} }
Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tissera_2026_CVPR, author = {Tissera, Dumindu and Awadallah, Omar and Danish, Muhammad Umair and Sadhu, Ayan and Grolinger, Katarina}, title = {Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2429-2439} }
ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Junhao and Zeng, Deyu and Pang, Junhao and Li, Lini and Zhong, Xiaopin and Wu, Zongze}, title = {ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {295-305} }
UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vishal_2026_CVPR, author = {Vishal, Joseph Raj and Poluri, Nagasiri and Naik, Katha and Patil, Rutuja and Kota, Kashyap Hegde and Vinod, Krishna and Ramesh, Prithvi Jai and Farhadi, Mohammad and Yang, Yezhou and Chakravarthi, Bharatesh}, title = {UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1862-1871} }
MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhiyu and Zhou, Zewei and Cai, Tianhui and Zhang, Yun and Ma, Jiaqi}, title = {MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {878-888} }
MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zhilin and Zhang, Zhihui and Sun, Shiliang and Zhao, Jing and Yang, Hao}, title = {MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2089-2099} }
Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Wouladje_2026_CVPR, author = {Wouladje, Cabrel and Mumanikidzwa, Golden Tendekai and Islam, Md Apon and Xu, Huiying and Li, Hongbo and Tan, Wenzhe and Chen, Zhendong and Zhu, Xinzhong}, title = {Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1567-1576} }
Instant Colorization of Gaussian Splats-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lieber_2026_CVPR, author = {Lieber, Daniel and Mock, Alexander and Wandel, Nils}, title = {Instant Colorization of Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {170-180} }
Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yibing and Katakol, Sudeep and Brack, Manuel and Lin, Jinhong and Bai, Haoyue and Li, Yu-Teng and Zhang, Richard and Shechtman, Eli and Ravi, Hareesh and Kale, Ajinkya}, title = {Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1883-1892} }
Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Huaiyuan and Yang, Muli and Goenawan, Gabriel James and Wang, Kai and Wang, Zheng and Hu, Peng and Peng, Xi and Zhu, Hongyuan}, title = {Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3029-3039} }
Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Zhenghui and Man, Yuanbin and Sheng, Junyuan and Lin, Bowen and Ahmed, Ahmed and Jiang, Bo and Zhang, Boyuan and Yin, Miao and Jin, Sian and Gnawali, Omprakash and Zhang, Chengming}, title = {Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3060-3069} }
LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yilong and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {191-201} }
World Model Robustness via Surprise Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zollicoffer_2026_CVPR, author = {Zollicoffer, Geigh and Chopra, Tanush and Yan, Mingkuan and Ma, Xiaoxu and Eaton, Kenneth and Riedl, Mark}, title = {World Model Robustness via Surprise Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3146-3155} }
U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Smith_2026_CVPR, author = {Smith, Michael and Ferrie, Frank P.}, title = {U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1850-1861} }
Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Younggun and Swetha, Sirnam and Kagdi, Fazil and Shah, Mubarak}, title = {Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2100-2110} }
PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jinkai and Zhang, Zhenliang and Fan, Lifeng and Wang, Wei}, title = {PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1976-1985} }
LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Chowdhury_2026_CVPR, author = {Chowdhury, Md Abtahi Majeed and Rahman, Md Rifat Ur and Taki, Akil Ahmad}, title = {LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1640-1649} }
Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haihao and Dong, Siwei and Li, Jianing and Zhao, Rui and Zhang, Yunjian and Qin, Geng and Zhu, Lin}, title = {Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3220-3230} }
SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krukowski_2026_CVPR, author = {Krukowski, Patryk and Gorczyca, Lukasz and Helm, Piotr and Ksiazek, Kamil and Spurek, Przemyslaw}, title = {SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2377-2386} }
MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinying and Jing, Junfeng and Wu, Tong and Gao, Tian and Sheng, Zhihong}, title = {MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2585-2595} }
NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Pengcheng and Hu, Yue and Li, Wenhao and Gunderson, Nicole M and Feng, Andrew and Sun, Zhenglong and Beerel, Peter and Seibel, Eric J}, title = {NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {403-413} }
FLToM: Robust Federated Learning with Theory-of-Mind Structure-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Tianshu and Yang, Liu and Guo, Sichang and Wang, Qilong and Hu, Qinghua}, title = {FLToM: Robust Federated Learning with Theory-of-Mind Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2503-2513} }
RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baltaxe_2026_CVPR, author = {Baltaxe, Michael and Levi, Dan and Benaim, Sagie}, title = {RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {558-568} }
G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Yokoya, Naoto}, title = {G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {518-527} }
Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singhi_2026_CVPR, author = {Singhi, Nishad and Bialas, Christian and Jauhri, Snehal and Prasad, Vignesh and Chalvatzaki, Georgia and Rohrbach, Marcus and Rohrbach, Anna}, title = {Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3124-3135} }
Back

