Findings
- Back
Revisiting Real-Time Detection Transformer with Efficient Encoder Design-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiannan and Kane, Aditya and Zhou, Fengzhe and Wei, Yunchao and Shi, Humphrey}, title = {Revisiting Real-Time Detection Transformer with Efficient Encoder Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6859-6868} }
Spatial Transcriptomics as Images for Large-Scale Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yishun and Qi, Jiaxin and Wang, Jian and Zheng, Yuhua and Huang, Jianqiang}, title = {Spatial Transcriptomics as Images for Large-Scale Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1191-1200} }
GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jialin and Fu, Bin and Wang, Ruiping and Chen, Xilin}, title = {GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {264-274} }
Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Hoin and Lu, Shenyu and Wang, De and Wang, Xiaoqian}, title = {Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7956-7967} }
Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Erwei and Zeng, Haijin and Xiao, Weiwei and Cao, Shijie and Shan, Qiben and Wu, Shaocong and Su, Jingyong and Liu, Jie}, title = {Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2937-2946} }
AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Lu, Yung-Hsiang and Davis, James C.}, title = {AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2618-2628} }
Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Youngjun and Kang, Joonseong and Lim, Sungjun and Song, Kyungwoo}, title = {Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2387-2397} }
CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Meiqi and Xu, Longnyu and Liu, Jun and Li, Hewu and Qiu, Han}, title = {CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1482-1491} }
Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Das_Biswas_2026_CVPR, author = {Das Biswas, Shristi and Roy, Arani and Roy, Kaushik}, title = {Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7903-7913} }
AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Tianyi and Tang, Tao and Gui, Xingtai and Li, Yongkang and Zheng, Jiasen and Huang, Weiyao and Kong, Lingdong and Han, Wencheng and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1085-1095} }
Towards Generalization of Scene Text Tampering Localization via Causal Invariance-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Huiru and Dong, Bin and Huang, Kaizhu and Huang, Xiaowei and Wang, Qiufeng}, title = {Towards Generalization of Scene Text Tampering Localization via Causal Invariance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7262-7271} }
TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, K Naveen and Guizani, Mohsen}, title = {TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7945-7955} }
CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Chengqi and Sun, Kaiyue and Fang, Rongyao and Zhang, Manyuan and Feng, Yan and Luo, Ying and Liu, Yufang and Wang, Ke and Pei, Peng and Cai, Xunliang and Li, Hongsheng and Ma, Yi and Liu, Xihui}, title = {CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9586-9596} }
Causal Chain-Guided Reasoning for Modular and Explainable Causal-Why Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parmar_2026_CVPR, author = {Parmar, Paritosh and Peh, Eric and Fernando, Basura}, title = {Causal Chain-Guided Reasoning for Modular and Explainable Causal-Why Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5860-5870} }
What Matters for Scalable and Robust Learning in End-to-End Driving Planners?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Holtz_2026_CVPR, author = {Holtz, David and Hanselmann, Niklas and Doll, Simon and Cordts, Marius and Schiele, Bernt}, title = {What Matters for Scalable and Robust Learning in End-to-End Driving Planners?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {931-941} }
Optimal-Transport-based Feature Alignment for Multimodal Change Detection-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Mengqi and Liu, Jun and Cui, Li and Duan, Yuping and Wang, Faqiang}, title = {Optimal-Transport-based Feature Alignment for Multimodal Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6342-6351} }
Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wimbauer_2026_CVPR, author = {Wimbauer, Felix and Manhardt, Fabian and Oechsle, Michael and Kalischek, Nikolai and Rupprecht, Christian and Cremers, Daniel and Tombari, Federico}, title = {Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4541-4551} }
Active Exploration for Sparse Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Lidholm_2026_CVPR, author = {Lidholm, Johanna and Dill\'en, Ludvig and Kukelova, Zuzana and Sattler, Torsten and Larsson, Viktor}, title = {Active Exploration for Sparse Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {338-347} }
IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Lindstrom_2026_CVPR, author = {Lindstr\"om, Carl and Rafidashti, Mahan and Fatemi, Maryam and Hammarstrand, Lars and Oswald, Martin R. and Svensson, Lennart}, title = {IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {316-326} }
MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Mengxi and Zhou, Long and Li, Zhixia and Kwan, Adrian and Laprise, Denis and Huang, Hengyi and Wu, Xiaqing and Wu, Shuang}, title = {MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {990-999} }
SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schlesinger_2026_CVPR, author = {Schlesinger, Oded and Farzam, Amirhossein and Di Martino, J. Matias and Sapiro, Guillermo}, title = {SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2335-2345} }
CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nottebaum_2026_CVPR, author = {Nottebaum, Moritz and Dunnhofer, Matteo and Micheloni, Christian}, title = {CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2493-2502} }
Multimodal Decoupled Dynamic Graph Learning for Brain Disease Diagnosis-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Aimei and Cai, Yongxing and Liu, Bin and Sun, Jiale and Zhao, Guixin}, title = {Multimodal Decoupled Dynamic Graph Learning for Brain Disease Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5430-5439} }
How to Achieve Prototypical Birth and Death for OOD Detection?-
[pdf]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Ningkang and Yu, Qianfeng and Peng, Xiaoqian and Qian, Linjing and Liu, Yafei and Xiao, Canran and Lu, Xinyu and Lu, Tingyu and Zheng, Zhichao and Gu, Yanhui}, title = {How to Achieve Prototypical Birth and Death for OOD Detection?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6466-6475} }
2D Triangle Splatting for Direct Differentiable Mesh Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Kaifeng and Zhou, Zheng and Peng, Yingliang and Wang, Qianwei}, title = {2D Triangle Splatting for Direct Differentiable Mesh Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {285-294} }
TextBind: Your Vision-Language Models are Naturally Unified Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Xu and Fu, Yun}, title = {TextBind: Your Vision-Language Models are Naturally Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6029-6039} }
DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Yoonhwa and Fu, Junryu and Golparvar-Fard, Mani}, title = {DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2121-2130} }
Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather-
[pdf]
[supp]
[bibtex]@InProceedings{Jangamreddy_2026_CVPR, author = {Jangamreddy, Nikhil Kumar and Baktashmotlagh, Mahsa and Arora, Chetan}, title = {Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7686-7696} }
Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tong_2026_CVPR, author = {Tong, Yujun and Chang, Dongliang and Yin, Zijin and Liu, Xintong and Fang, Yuanchen and Ma, Zhanyu}, title = {Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6976-6986} }
VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chupeng and Rao, Jiyong and Sun, Shangquan and Zhao, Runkai and Cai, Weidong}, title = {VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7530-7540} }
Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Siqi and Gao, Zilve and Qiu, Haibo and Liu, Fanfan and Shi, Peng and Zeng, Zhixiong and Liao, Qingmin and Ma, Lin}, title = {Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9738-9748} }
Decoupled Scale-wise Autoregressive Modeling for Visual Generation-
[pdf]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Sucheng and Yu, Yaodong and Ruiz, Nataniel and Wang, Feng and Xie, Cihang}, title = {Decoupled Scale-wise Autoregressive Modeling for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4506-4515} }
Debiased One-Shot NAS Via Density-Aware Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Roshtkhari_2026_CVPR, author = {Roshtkhari, Mehraveh Javan and Toews, Matthew and Pedersoli, Marco}, title = {Debiased One-Shot NAS Via Density-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2357-2366} }
Pioneering Perceptual Video Fluency Assessment: A Novel Task with Benchmark Dataset and Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Qizhi and Yuan, Kun and Qu, Yunpeng and Sun, Ming and Zhou, Chao and Zhu, Jihong}, title = {Pioneering Perceptual Video Fluency Assessment: A Novel Task with Benchmark Dataset and Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4821-4831} }
Turning Generators into Retrievers: Unlocking MLLMs for Natural Language-Guided Geo-Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuqi and Zhang, Xiaohan and Arrabi, Ahmad and Sultani, Waqas and Chen, Chen and Wshah, Safwan}, title = {Turning Generators into Retrievers: Unlocking MLLMs for Natural Language-Guided Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6331-6341} }
SLAD : Shared LoRA Adapters for Task Specific Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Bensaid_2026_CVPR, author = {Bensaid, Reda and Bendou, Yassir and Gripon, Vincent and Leduc-Primeau, Fran\c{c}ois}, title = {SLAD : Shared LoRA Adapters for Task Specific Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2968-2977} }
QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Xuan Bac and Nguyen, Hoang-Quan and Pandey, Sankalp and Faltermeier, Tim and Borys, Nicholas and Churchill, Hugh and Luu, Khoa}, title = {QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8684-8694} }
DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hannan_2026_CVPR, author = {Hannan, Tanveer and Mallios, Dimitrios and Pathak, Parth and Sardari, Faegheh and Seidl, Thomas and Bertasius, Gedas and Fayyaz, Mohsen and Sengupta, Sunando}, title = {DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9337-9346} }
OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Liu, Fangfu and Wang, Yule and Pang, Yize and Duan, Yueqi}, title = {OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {66-76} }
Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Shizhao and Li, Jun and Li, Qiming}, title = {Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8143-8152} }
Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yizhou and Yang, Dingkang and Chen, Zizhi and Han, Minghao and Zhang, Xukun and Liu, Keliang and Wei, Jingwei and Zhang, Lihua}, title = {Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8651-8660} }
AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinghan and Liu, Xiao and Xu, Yifan and Fu, Jiaqi and Huang, Jiayu and Liu, Yixuan and Dong, Yuxiao and Tang, Jie}, title = {AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1700-1710} }
Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Youngmin and Oh, Changjae and Ham, Bumsub}, title = {Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7493-7502} }
Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jo_2026_CVPR, author = {Jo, Yujin and Bae, Sangyoon and Kim, Taesup}, title = {Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9706-9715} }
OmniMotion-X: Versatile Multimodal Whole-Body Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Guowei and Bian, Yuxuan and Zeng, Ailing and Chen, Zhuo and Shi, Mingyi and Huang, Shaoli and Li, Wen and Duan, Lixin and Xu, Qiang}, title = {OmniMotion-X: Versatile Multimodal Whole-Body Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3641-3652} }
It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeha and Lee, Jin Won and You, Siwoo and Lee, Jangho}, title = {It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9500-9509} }
CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ping and Li, Chengzhou and Meng, Guanchen and Jia, Qi and Liu, Jinyuan and Liu, Zhu and Liu, Yu and Luo, Zhongxuan and Fan, Xin}, title = {CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1660-1669} }
Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shan and Yang, Zhao and Yan, Tianwei and Gong, Yusong and Wan, Qian and Chen, Shizhao and Song, Shezheng and Wang, Chengyu and Wang, Meng}, title = {Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2304-2313} }
STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Jiang, Xueying and Zhang, Gongjie and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8185-8194} }
Contact Matrix: Enhancing Dance Motion Synthesis with Precise Interaction Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xuhai and Cen, Zhi and Pi, Huaijin and Peng, Sida and Zhou, Xiaowei and Liu, Yong}, title = {Contact Matrix: Enhancing Dance Motion Synthesis with Precise Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3598-3608} }
VACoT: Rethinking Visual Data Augmentation with VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhengzhuo and Sun, Chong and Du, SiNan and Li, Chen and Lyu, Jing and Yuan, Chun}, title = {VACoT: Rethinking Visual Data Augmentation with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9780-9790} }
KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Rufan and Xue, Tianyi and Zhou, Tiantian and Wu, Weiwei and Li, Changle and Lu, Yuhuan}, title = {KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {980-989} }
Block Cascading: Training Free Acceleration of Block-Causal Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2026_CVPR, author = {Bandyopadhyay, Hmrishav and Pinnaparaju, Nikhil and Entezari, Rahim and Scott, Jim and Song, Yi-Zhe and Jampani, Varun}, title = {Block Cascading: Training Free Acceleration of Block-Causal Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4078-4088} }
GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Zehong and Liu, Zhiyuan and Wang, Yuning and Li, Jinhao and Jiang, Junkai and Jiang, Yanbo and Xu, Zhenhua and Wang, Jianqiang}, title = {GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1029-1038} }
Group Relative Attention Guidance for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuanpu and Niu, Xuesong and Chen, Ruidong and Song, Dan and Zeng, Jianhao and Du, Penghui and Cao, Haoxiang and Wu, Kai and Liu, An-an}, title = {Group Relative Attention Guidance for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3840-3850} }
Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhou, Honglu and Wang, Shijie and Li, Junnan and Xiong, Caiming and Savarese, Silvio and Bansal, Mohit and Ryoo, Michael S. and Niebles, Juan Carlos}, title = {Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9088-9099} }
GeneFlow: Modeling Heredity and Variation via Flow Matching Transformers for Kinship Verification-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yihang and Hou, Xianxu and Shen, Linlin}, title = {GeneFlow: Modeling Heredity and Variation via Flow Matching Transformers for Kinship Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3688-3697} }
EIRES:Training-free AI-Generated Image Detection via Edit-Induced Reconstruction Error Shift-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Wan and Yan, Jing and Chen, Xiaojing and Shen, Ling and Lin, Chenhao and Diao, Yunfeng and Hong, Richang}, title = {EIRES:Training-free AI-Generated Image Detection via Edit-Induced Reconstruction Error Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6486-6495} }
A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Duo and Yang, Zuhao and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2823-2833} }
BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zihao and Wang, Ruotong and Lyu, Siwei and Zhang, Min and Wu, Baoyuan}, title = {BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8661-8671} }
PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Kangmin and Lee, MinKyu and Kim, Tae-Young and Lee, ByeongCheol and An, JoonSeoung and Heo, Jae-Pil}, title = {PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {468-477} }
Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bohan and Tang, Weidong and Chi, Zhixiang and Jin, Yi and Li, Zhenbo and Wang, Yang and Wu, Yanan}, title = {Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7830-7840} }
Semantic-Aware Spectral Reconstruction: A Spectral Library-Aided Unsupervised Method Based on the Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Keli and Qian, Yuntao}, title = {Semantic-Aware Spectral Reconstruction: A Spectral Library-Aided Unsupervised Method Based on the Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4940-4949} }
InternVL-X: Advancing and Accelerating InternVL Series with Efficient Visual Token Compression-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Dongchen and Zhang, Zilu and Huang, Leping and Sun, Yuyao and Zeng, Jianliang and Shu, Mao and Cao, Huo}, title = {InternVL-X: Advancing and Accelerating InternVL Series with Efficient Visual Token Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5765-5775} }
FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Dian and Xu, Zhengzheng and Wang, Peiyang and Liu, Like and Wang, Yule and Shi, Jieqi and Huo, Jing}, title = {FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1325-1334} }
Earthquake-Bench: Video Generation Benchmark for Earthquake Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Lei and Chen, Hao and Chen, Yuyan and Wu, Kui and Chen, Lijia and Zhong, Fangwei and Huang, Feiran and Song, Bo and Yang, Han}, title = {Earthquake-Bench: Video Generation Benchmark for Earthquake Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4055-4066} }
Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Yu and Sun, Sheng and Cheng, Shengjia and Liu, Teli and Li, Mingfeng and Liu, Min}, title = {Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {837-846} }
Quantifying the Gap between Understanding and Generation within Unified Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenlong and Chen, Yuhang and Hu, Zhihan and Chen, Dongping and Chen, Wenhu and Wiegreffe, Sarah and Zhou, Tianyi}, title = {Quantifying the Gap between Understanding and Generation within Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5936-5946} }
Plug-and-Think: Structured Reasoning for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Kaikai and wen, Di and Li, Xinhai and Xiang, Senwei}, title = {Plug-and-Think: Structured Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3136-3145} }
Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhou, Yan and Yang, Jufeng}, title = {Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8897-8906} }
Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Xiao, Bin}, title = {Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6748-6757} }
ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Zhou, Fangnan and Feng, Wei and Wan, Liang}, title = {ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6643-6654} }
Latent-Compressed Variational Autoencoder for Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Jiarui and Zhao, Wenshuai and Zou, Zhengtao and Kannala, Juho and Solin, Arno}, title = {Latent-Compressed Variational Autoencoder for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3873-3883} }
CaptAin: Caption-driven Alignment for Bridging Modality Gaps in Partially Relevant Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Chuanshen and Zhou, Kai and Wang, Feiqi and Ning, Yutao and Xiong, Zhendong and Li, Yirui and Wen, Zhiquan and Tan, Mingkui}, title = {CaptAin: Caption-driven Alignment for Bridging Modality Gaps in Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6208-6217} }
Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Lin and Ou, Yafei and Deng, Zhipeng and Dai, Pengyu and Hou, Chongxian and Yan, Jiale and Li, Yaqian and Long, Kaiwen and Gong, Xun and Ikebe, Masayuki and Zheng, Yefeng}, title = {Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2049-2059} }
NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Huanxin and Wu, Zhize and Jiang, Yue and Zhou, Jijian and Xu, Zhiwei and Li, Teng and Shu, Jianhua and Cheng, Fan}, title = {NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7562-7572} }
Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Akbarian_2026_CVPR, author = {Akbarian, Fatemeh and Baninajjar, Anahita and Zhang, Yingyi and Balashankar, Ananth and Aminifar, Amir}, title = {Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {748-757} }
LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minkwan and Lee, Seungmin and Kim, Junho and Kim, Young Min}, title = {LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {488-497} }
From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness-
[pdf]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Chenlin and Gong, Ao and Ling, Xingtao and Zhu, Yingying}, title = {From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7101-7110} }
VideoMatGen: PBR Materials through Joint Generative Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hasselgren_2026_CVPR, author = {Hasselgren, Jon and Hasan, Milos and Zeng, Zheng and Munkberg, Jacob}, title = {VideoMatGen: PBR Materials through Joint Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2440-2450} }
ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Hosu and Kim, Junho and Kim, Hyunjun and Ro, Yong Man}, title = {ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8291-8302} }
Environmental Understanding Vision-language Model for Embodied Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bang_2026_CVPR, author = {Bang, Jinsik and Bae, Jaeyeon and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Environmental Understanding Vision-language Model for Embodied Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3092-3102} }
Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jungkon and Jung, Cheolseung and Choi, Jong-Min and Lee, Juseong}, title = {Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {847-856} }
VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vasu_2026_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Koc, Cem and Faghri, Fartash and Li, Chun-Liang and Feng, Bo and Lai, Zhengfeng and Cao, Meng and Tuzel, Oncel and Pouransari, Hadi}, title = {VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9654-9663} }
Learning Vision-Language-Action World Models for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Guoqing and Tang, Pin and Ren, Xiangxuan and Zhao, Guodongfang and Feng, Bailan and Ma, Chao}, title = {Learning Vision-Language-Action World Models for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1073-1084} }
Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Haoyi and Wang, Xiaoxiao and Mao, Ning and Wang, Qian and Mu, Lifu and Zheng, Wen and Wei, Tao and Chen, Wei}, title = {Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9316-9326} }
DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Zeng, Zhixiong and Ma, Lin and Zhang, Jing}, title = {DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9389-9400} }
FLAIR: Frequency- and Locality-Aware Implicit Neural Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Ko_2026_CVPR, author = {Ko, Sukhun and Youn, Seokhyun and Kye, Dahyeon and Min, Kyle and Eom, Chanho and Oh, Jihyong}, title = {FLAIR: Frequency- and Locality-Aware Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4877-4887} }
Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghoon and Lee, Geon and Park, Hyekang and Ham, Bumsub}, title = {Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6819-6828} }
Disentangle Once, Control All: A Unified and Efficient Framework for Disentangling Multi-Condition Control in Human Video Generation-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Runqi and Wang, Chuming and Yi, Fangqiu and Zhao, Yuying and Xu, Jingyu and Dai, Yuhang and Wang, Zheng and Zhang, Chi}, title = {Disentangle Once, Control All: A Unified and Efficient Framework for Disentangling Multi-Condition Control in Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3903-3913} }
Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Yibin and Teng, Xichao and Chen, Shuo and Liu, Leqi and Wang, Kun and Song, Xiaokai and Li, Zhang}, title = {Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1731-1741} }
Stochastic Perturbations Improve Distribution-to-Distribution Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Shiye and Zhang, Yuhui and Zhou, Linqi and Ranganath, Rajesh and Yeung-Levy, Serena}, title = {Stochastic Perturbations Improve Distribution-to-Distribution Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3965-3974} }
Decomposing Subject-Driven Image Generation via Intermediate Structural Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhong and Yu, Yizhou}, title = {Decomposing Subject-Driven Image Generation via Intermediate Structural Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3799-3808} }
BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharma_2026_CVPR, author = {Sharma, Akash and Mhatre, Chinmay and Gawali, Sankalp and Bokkasam, Ruthvik and Sharma, Brij and Pattanaik, Vishwajeet and Rathore, Punit and Krishnapuram, Raghu and Kovvali, Vijay Gopal and Chakraborty, Anirban and Simmhan, Yogesh}, title = {BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2240-2249} }
Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xuekang and Zhou, Ji-Zhe and Feng, Kaiwen and Qu, Chenfan and Wang, Xiwen and Wang, Yunfei and Zhou, Liting and Liu, Jian}, title = {Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7198-7207} }
Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenhu and Wu, Yiming and Wang, Huanyu and Liu, YaoYang and Dou, Huanzhang and Yang, Senqiao and Wu, Sitong and Zhao, Hanbin and Jia, Jiaya}, title = {Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2852-2862} }
MIRA: Multimodal Iterative Reasoning Agent for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyun and Hua, Hang and Luo, Jiebo}, title = {MIRA: Multimodal Iterative Reasoning Agent for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9563-9573} }
SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thai_2026_CVPR, author = {Thai, Gia Huy and Vu, Hoang-Nguyen and Phan, Anh-Minh and Ly, Quang-Thinh and Nguyen, Thi-Ngoc-Truc and Ho, Nhat}, title = {SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7337-7346} }
HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tushar_2026_CVPR, author = {Tushar, Zahid Hassan and Purushotham, Sanjay}, title = {HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6955-6965} }
Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yanyun and Ye, Qingqing and Liu, Li and Liang, Zi and Hu, Haibo}, title = {Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {768-778} }
DSAA: Dual-Stage Attribute Activation for Fine-Grained Open Vocabulary Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Donghong and Lin, Endian and Liu, Hanqing and Liu, Mingjie and Cui, Luoping and Yang, Zhao and Zhu, Chuang}, title = {DSAA: Dual-Stage Attribute Activation for Fine-Grained Open Vocabulary Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6538-6547} }
Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Han and Li, Qin and Wang, Peixin and Zhang, Min}, title = {Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8930-8940} }
Memorization in 3D Shape Generation: An Empirical Study-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pu_2026_CVPR, author = {Pu, Shu and Zeng, Boya and Zhou, Kaichen and Wang, Mengyu and Liu, Zhuang}, title = {Memorization in 3D Shape Generation: An Empirical Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1828-1838} }
FusionBridge: An Efficient Fusion Via Feature Disentanglement for Multi-Modal Object Re-Identification-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yali and Han, Qianru and He, Xinwei and Liu, Zhi and Xiang, Jinhai}, title = {FusionBridge: An Efficient Fusion Via Feature Disentanglement for Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5735-5744} }
Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters-
[pdf]
[supp]
[bibtex]@InProceedings{Vorster_2026_CVPR, author = {Vorster, Chris and Maniparambil, Mayug and O'Connor, Noel and Murphy, Noel and Molloy, Derek}, title = {Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7820-7829} }
Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Shijie and Wu, Changti and Yang, Laurence Tianruo and Yuan, Hang and Yu, Bin and Zhang, Lei and Chen, Kai}, title = {Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9824-9835} }
Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parolari_2026_CVPR, author = {Parolari, Luca and Faccioli, Nicla and Ballan, Lamberto}, title = {Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1742-1751} }
LWTformer: A Detail-Aware, Learnable Wavelet-Transformer for Ancient Chinese Character Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Ruan_2026_CVPR, author = {Ruan, Wentao and Li, Xinhui and Cheng, Zhan and Fan, Cunhang and Tian, Libao and Lv, Zhao}, title = {LWTformer: A Detail-Aware, Learnable Wavelet-Transformer for Ancient Chinese Character Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4971-4981} }
OmniInsert: Mask-Free Video Insertion of Any Reference via Diffusion Transformer Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jinshu and Li, Xinghui and Bai, Xu and Ma, Tianxiang and Zhang, Pengze and Li, Mengtian and Chen, Zhuowei and Li, Gen and Liu, Lijie and Zhao, Songtao and Li, Bingchuan and He, Qian}, title = {OmniInsert: Mask-Free Video Insertion of Any Reference via Diffusion Transformer Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4067-4077} }
DaMN: Deleting and Migrating Normalization Layers from Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Ryabykin_2026_CVPR, author = {Ryabykin, Alexey and Zhelavskaya, Irina and Shvetsov, Egor and Rukhovich, Alexey and Okhotnikov, Nikita and Khrapov, Artem and Burnaev, Evgeny and Kryzhanovskiy, Vladimir Mikhailovich}, title = {DaMN: Deleting and Migrating Normalization Layers from Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2883-2892} }
PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Jiaxin and Zhang, Guofeng and Ma, Wufei and Liang, Naifu and Kortylewski, Adam and Yuille, Alan}, title = {PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6869-6879} }
MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Hanjun and Wang, Hua and Zhang, Fan}, title = {MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7388-7397} }
HoliSafe: Holistic Safety Benchmarking and Modeling for Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Youngwan and Kim, Kangsan and Park, Kwanyong and Jung, Ilchae and Jang, Soojin and Lee, Seanie and Lee, Yong-Ju and Hwang, Sung Ju}, title = {HoliSafe: Holistic Safety Benchmarking and Modeling for Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5989-5998} }
Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer-
[pdf]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Chenhang and Zhang, An and Chen, Yuxin and Deng, Gelei and Zheng, Jingnan and Liang, Zhenkai and Wang, Xiang and Chua, Tat-Seng}, title = {Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2346-2356} }
OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2026_CVPR, author = {Moon, Seungjae and Oh, Seunghyun and Ro, Youngmin}, title = {OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7357-7367} }
BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Mincheol and Lim, HyunJin and Kang, Bomin and Park, Daehee}, title = {BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3470-3480} }
TP^2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ya-Yun and Tippayamontri, Kan and Yang, Chih-Yuan and Hsu, Jane Yung-jen}, title = {TP{\textasciicircum}2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8237-8246} }
ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ju_2026_CVPR, author = {Ju, Shaobo and Song, Baiyang and Chen, Tao and Zhang, Jiapeng and Wu, Qiong and Chang, Chao and Wang, Huaixi and Zhou, Yiyi and Ji, Rongrong}, title = {ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8326-8336} }
Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting-
[pdf]
[supp]
[bibtex]@InProceedings{Bhattarai_2026_CVPR, author = {Bhattarai, Ananta R. and Rhodin, Helge}, title = {Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {602-612} }
TransKV: A Data-Driven Pruning Method for Large Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Guangning and Meng, Fanxu and Zhou, Ruijie and Ng, Michael K and Pei, Wenjie and Zhang, Muhan}, title = {TransKV: A Data-Driven Pruning Method for Large Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2451-2461} }
CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Akash and Ashraf, Tajamul and Singh, Rishu Kumar and Saeed, Numan and Saha, Sriparna and Chen, Xiuying and Khan, Salman}, title = {CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9695-9705} }
Learning by Neighbor-Aware Semantics, Deciding by Open-Form Flows: Towards Robust Zero-Shot Skeleton Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yang and Li, Miaoge and Rao, Zhijie and Zeng, Deze and Guo, Song and Guo, Jingcai}, title = {Learning by Neighbor-Aware Semantics, Deciding by Open-Form Flows: Towards Robust Zero-Shot Skeleton Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3374-3383} }
InstaDA: Augmenting Instance Segmentation Data with Dual-Agent System-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Xianbao and He, Yonghao and Boukhers, Zeyd and See, John and Su, Hu and Sui, Wei and Yang, Cong}, title = {InstaDA: Augmenting Instance Segmentation Data with Dual-Agent System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4300-4309} }
MaMe: Matrix-Based Token Merging-
[pdf]
[supp]
[bibtex]@InProceedings{Huo_2026_CVPR, author = {Huo, Simin and Li, Ning}, title = {MaMe: Matrix-Based Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2863-2872} }
BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayao and Zhang, Yiping and Hasan, Mohammad Maruf and Lei, Xiaoying and Zhang, Jiale and Zhu, Junwu and Wu, Qilin and Zhao, Dongfang}, title = {BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {705-715} }
Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiang and Zhou, Yimin and Wang, Jinxiang and Huang, Yujun and Xie, Shuzhao and Qin, Shiyu and Hong, Mingyao and Li, Jiawei and Wang, Yaowei and Wang, Zhi and Xia, Shu-Tao and Chen, Bin}, title = {Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2261-2271} }
CP-IMoE: Collaborative Prompt-Guided Interactive Mixture-of-Experts for Incomplete Multimodal Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jing and Zhang, Dongbo and Zheng, Yalin and Meng, Yanda}, title = {CP-IMoE: Collaborative Prompt-Guided Interactive Mixture-of-Experts for Incomplete Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6090-6099} }
CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Jian_2026_CVPR, author = {Jian, Zhongquan and Chen, Yanhao and Hu, Bingbing and Lv, Wenhan and Wang, Shaopan and Wu, Jipeng and Yao, Junfeng and Lu, Yang and Wu, Qingqiang}, title = {CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7706-7716} }
B-MoE: A Body-Part-Aware Mixture-of-Experts "All Parts Matter" Approach to Micro-Action Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Poddar_2026_CVPR, author = {Poddar, Nishit and Reka, Aglind and Borza, Diana-Laura and Majhi, Snehashis and Balazia, Michal and Das, Abhijit and Br\'emond, Fran\c{c}ois}, title = {B-MoE: A Body-Part-Aware Mixture-of-Experts ''All Parts Matter'' Approach to Micro-Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3364-3373} }
See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Le Thien Phuc and Yu, Zhuoran and Hang, Samuel Low Yu and An, Subin and Lee, Jeongik and Ban, Yohan and Chung, SeungEun and Nguyen, Thanh-Huy and Maeng, JuWan and Lee, Soochahn and Lee, Yong Jae}, title = {See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2272-2283} }
Object-Centric Vision Token Pruning for Vision Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Guangyuan and Zhao, Rongzhen and Deng, Jinhong and Wang, Yanbo and Pajarinen, Joni}, title = {Object-Centric Vision Token Pruning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7040-7049} }
M^4Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Meihua and Tong, Xinyu and Yang, Li}, title = {M{\textasciicircum}4Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5368-5377} }
Understanding Reward Hacking in Text-to-Image Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Yunqi and Kao, Kuei-Chun and Zhou, Hengguang and Hsieh, Cho-Jui}, title = {Understanding Reward Hacking in Text-to-Image Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4245-4255} }
SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Bahavan_2026_CVPR, author = {Bahavan, Thiru Thillai Nadarasar and Seneviratne, Sachith and Halgamuge, Saman}, title = {SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6901-6910} }
POS-ISP: Pipeline Optimization at the Sequence Level for Task-aware ISP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Won_2026_CVPR, author = {Won, Jiyun and Yang, Heemin and Kim, Woohyeok and Ok, Jungseul and Cho, Sunghyun}, title = {POS-ISP: Pipeline Optimization at the Sequence Level for Task-aware ISP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4931-4939} }
RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Ha_2026_CVPR, author = {Ha, Yoonwoo and Moon, Hyungpil}, title = {RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1525-1534} }
GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Lianming and Hu, Haibo and Li, Qiao and He, Xin and Guan, Nan and Xue, Chun Jason}, title = {GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2834-2843} }
coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chunhan and Wu, Qifeng and Pan, Jia-Hui and Hui, Ka-Hei and Hu, Jingyu and Jiang, Yuming and Sheng, Bin and Liu, Xihui and Gong, Wenjuan and Liu, Zhengzhe}, title = {coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9802-9812} }
Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification-
[pdf]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Shenghui and Wang, Rui and Xu, Tianyang and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2639-2648} }
Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Bumjun and No, Albert}, title = {Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7988-7997} }
Recursive Think-Answer Process for LLMs and VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Chee, Youngchae and Ro, Yong Man}, title = {Recursive Think-Answer Process for LLMs and VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9608-9621} }
DUALVISION: RGB-Infrared Multimodal Large Language Models for Robust Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Majeedi_2026_CVPR, author = {Majeedi, Abrar and Ruan, Zhiyuan and Zhao, Ziyi and Wang, Hongcheng and Lu, Jianglin and Li, Yin}, title = {DUALVISION: RGB-Infrared Multimodal Large Language Models for Robust Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5786-5795} }
What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shaobo and Xiong, Haobo and Liu, Kai and Lin, Yuna}, title = {What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2813-2822} }
FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Qinghui and Yang, Xue and Chen, Xunlei and Lai, Jinshan and Meng, Hua and Tang, Xiaohu}, title = {FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8009-8018} }
S3O: Selective Spatial-Spectral Operator for Cross-Scale Fusion-
[pdf]
[bibtex]@InProceedings{Pei_2026_CVPR, author = {Pei, Jieyuan and Li, Wei and Li, Zhuoxuan and Zhu, Junwei and Lu, Meiyi and Jiang, Jiawei and Wang, Chenyu and Zheng, Jianwei}, title = {S3O: Selective Spatial-Spectral Operator for Cross-Scale Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6281-6290} }
HiStream: Efficient High-Resolution Video Generation via Redundancy Eliminated Streaming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Haonan and Liu, Shikun and Zhou, Zijian and An, Zhaochong and Ren, Weiming and Liu, Zhiheng and Schult, Jonas and He, Sen and Chen, Shoufa and Cong, Yuren and Xiang, Tao and Liu, Ziwei and Perez-Rua, Juan-Manuel}, title = {HiStream: Efficient High-Resolution Video Generation via Redundancy Eliminated Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4603-4613} }
EpiMask: Leveraging Epipolar Distance Based Masks in Cross-Attention for Satellite Image Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deshmukh_2026_CVPR, author = {Deshmukh, Rahul and Chauhan, Aditya and Kak, Avinash}, title = {EpiMask: Leveraging Epipolar Distance Based Masks in Cross-Attention for Satellite Image Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6271-6280} }
AdaGaR: Adaptive Gabor Representation for Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chan_2026_CVPR, author = {Chan, Jiewen and Zhao, Zhenjun and Liu, Yu-Lun}, title = {AdaGaR: Adaptive Gabor Representation for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4276-4289} }
VoQA: Visual-only Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Jianing and Jiang, Luyang and Luo, Jie and Wu, Wenjun and Huang, Lei}, title = {VoQA: Visual-only Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9100-9109} }
IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zichen and Sun, Yuheng and Zhu, Mingxuan and Ma, Wenjie and Zhang, Situo and Wang, Zhexiang and Yang, Ziyue and Zhang, Danyang and Lan, Kunyao and Zhao, Zihan and Liu, Dingye and Xiang, Siqi and Chen, Lu and Yu, Kai}, title = {IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8672-8683} }
RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yinzhou and Shang, Yu and Chen, Yinuo and Wei, Bingwen and Zhang, Xin and Yu, Shu'ang and Shi, Liangzhi and Yu, Chao and Gao, Chen and Wu, Wei and Li, Yong}, title = {RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1472-1481} }
MeMix: Multi-Encoder Mixture Framework for Medical Report Generation-
[pdf]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yiming and Cui, Lizhen and Shen, Zhiqi}, title = {MeMix: Multi-Encoder Mixture Framework for Medical Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5474-5483} }
PaM-MIL: Proliferation and Metastasis Enhanced Localization for Multiple Instance Learning on Pathology Images-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Pengyu and Wang, Jiachuan and CHEN, Zhao and Cao, Caleb Chen and Wang, Liping and Jiang, Tingyi and Chen, Lei}, title = {PaM-MIL: Proliferation and Metastasis Enhanced Localization for Multiple Instance Learning on Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5305-5314} }
IRL-VLA: Vision-Language-Action Training via Reward World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Anqing and Yu, Gao and Yuwen, Heng and Wang, Yiru and Shuo, Wang and Hao, Jiang and Hao, Sun}, title = {IRL-VLA: Vision-Language-Action Training via Reward World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {970-979} }
SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guimard_2026_CVPR, author = {Guimard, Quentin and Bartsch, Federico and Caldarella, Simone and Aljundi, Rahaf and Ricci, Elisa and Mancini, Massimiliano}, title = {SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8101-8110} }
Softmax-GS: Generalized Gaussians Learning When to Blend or Bound-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Wang, Peng and Tan, Hao and Xu, Zexiang and Fuxin, Li}, title = {Softmax-GS: Generalized Gaussians Learning When to Blend or Bound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {508-517} }
On the Feasibility and Opportunity of Autoregressive 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zanming and Yoo, Jinsu and Jeon, Sooyoung and Liu, Zhenzhen and Campbell, Mark and Weinberger, Kilian Q and Hariharan, Bharath and Chao, Wei-Lun and Luo, Katie Z}, title = {On the Feasibility and Opportunity of Autoregressive 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1170-1179} }
LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jiazhou and Liu, Zhongyi and Shi, Ying and Zhao, Zhichun and Wang, Zhuoyu and Zhou, Yuhang and Hu, Huanling and Ye, Guangnan and Li, Mengtian and Guo, Lei}, title = {LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1629-1639} }
Indexing Multimodal Language Models for Large-scale Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Tharwat_2026_CVPR, author = {Tharwat, Bahey and Kordopatis-Zilos, Giorgos and Suma, Pavel and Reid, Ian and Tolias, Giorgos}, title = {Indexing Multimodal Language Models for Large-scale Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6737-6747} }
MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiyang and Li, Zongxia and Jin, Jihui and KV, Gouthaman and Raj, Vishnu and Sinha, Nilotpal and Chen, Jingxi and Du, Fan and Manocha, Dinesh}, title = {MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9433-9443} }
MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Zhipeng and Danier, Duolikun and Lenssen, Jan Eric and Bilen, Hakan}, title = {MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7304-7314} }
VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shiji and Xiong, Shukun and Huang, Yao and Yan, Jin and Wu, Zhenyu and Guan, Jiyang and Duan, Ranjie and Tao, Jialing and Xue, Hui and Wei, Xingxing}, title = {VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9412-9421} }
M-PhyGs: Multi-Material Object Dynamics from Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wada_2026_CVPR, author = {Wada, Norika and Yamashita, Kohei and Kawahara, Ryo and Nishino, Ko}, title = {M-PhyGs: Multi-Material Object Dynamics from Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6404-6413} }
A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stoica_2026_CVPR, author = {Stoica, Maria and Hekal, Abdelrahman and Lomuscio, Alessio}, title = {A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {685-694} }
TokenErase: Robust Concept Erasure via Visual-Injected Token Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Liangshun and Ni, Zhangkai and Wang, Hanli}, title = {TokenErase: Robust Concept Erasure via Visual-Injected Token Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4129-4139} }
Activation-Norm Maximization to Accelerate Training in Flow-Matching Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Belhe_2026_CVPR, author = {Belhe, Yash and Chang, Wesley and Li, Tzu-Mao and Ramamoorthi, Ravi and Gharbi, Micha\"el}, title = {Activation-Norm Maximization to Accelerate Training in Flow-Matching Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4089-4096} }
Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhe and Wen, Hao and Hao, Aiming and Song, Bingze and Wu, Meiqi and Wu, Jiahong and Chu, Xiangxiang and Lu, Sheng and Wang, Haoqian}, title = {Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8153-8163} }
Learning to Translate Noise for Robust Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ha_2026_CVPR, author = {Ha, Inju and Ryou, Donghun and Seo, Seonguk and Han, Bohyung}, title = {Learning to Translate Noise for Robust Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5034-5043} }
UGLMM: Towards Unified Vision Grounding with Large Multimodal Model-
[pdf]
[supp]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Xiangheng and Zhou, Li and Sun, Zenghui and Dong, Shichao and Sang, Nong and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo and Gao, Changxin and Zhang, Kaifu}, title = {UGLMM: Towards Unified Vision Grounding with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5683-5693} }
MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Nuolin and Wang, Linyuan and Wei, Haonan and Li, Lei and Yan, Bin}, title = {MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2691-2699} }
VideoScaffold: Elastic-Scale Visual Hierarchies for Streaming Video Understanding in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Naishan and Guo, Qingpei and Huang, Jie and Zhao, Feng}, title = {VideoScaffold: Elastic-Scale Visual Hierarchies for Streaming Video Understanding in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5947-5957} }
Speed3R: Sparse Feed-forward 3D Reconstruction Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Weining and Tan, Xiao and Han, Kai}, title = {Speed3R: Sparse Feed-forward 3D Reconstruction Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {119-128} }
Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration-
[pdf]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Bowen and Wang, Tao and Zhang, Miao and Yu, Xin and Chen, Jinwei and Li, Bo and Zhang, Kaihao}, title = {Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8175-8184} }
FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jintong and Zhao, He and Yang, Yibo and Guo, Dandan}, title = {FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7737-7746} }
Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN-
[pdf]
[supp]
[bibtex]@InProceedings{Zaghetto_2026_CVPR, author = {Zaghetto, C. and Purim, A. and Oliveira, W. and Ribeiro, J. R. and Nolla, H. and Santos, F. and Chang, M. and Vareto, R. H.}, title = {Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1221-1229} }
PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rosi_2026_CVPR, author = {Rosi, Gabriele and Cermelli, Fabio and Masone, Carlo and Caputo, Barbara}, title = {PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7326-7336} }
GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ray_2026_CVPR, author = {Ray, Cong and Deng, Xiangwen and Huang, Feice and Wu, ZhengXian and Jiang, Shen'ao and Jiao, Peng and Liu, Zhifang and Wang, Haoqian}, title = {GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9195-9204} }
Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Yeongsu and Choi, Seo-Yeon and Lee, Kyungsu}, title = {Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8857-8867} }
Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Takeuchi_2026_CVPR, author = {Takeuchi, Yuichiro and Imoto, Yusuke and Kato, Shunya}, title = {Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6880-6889} }
Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Su, Yiyang and Liu, Xiaoming}, title = {Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9401-9411} }
Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation-
[pdf]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Jiaqi and Guo, Hongcheng and Liu, Jiaheng and Zhou, Zhibo and Yang, Jian and Huang, Feiran}, title = {Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9867-9877} }
Trajectory-Diversity-Driven Robust Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Wan, Cong and Dong, SongLin and Ding, Chenhao and Wang, Qiang and Ma, Zhiheng and Gong, Yihong}, title = {Trajectory-Diversity-Driven Robust Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9143-9154} }
Blend-Aware Latent Diffusion: Mitigating Stitched Seams in Image Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yunpeng and Hou, Xingzhong and Wu, Jie and Liu, Boxiao and Zhang, Yi and Song, Guanglu and Liu, Yu and Tian, Changyao and Luo, Gen and You, Haihang}, title = {Blend-Aware Latent Diffusion: Mitigating Stitched Seams in Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4678-4687} }
Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Mukkamala_2026_CVPR, author = {Mukkamala, Hanvitha Saraswathi and Pujari, Arun K}, title = {Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1680-1689} }
Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yuhui and Yu, Siyue and Yang, Yuxing and Cheng, Guangliang and Xiao, Jimin}, title = {Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8941-8951} }
GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minsol and Ali, Usman}, title = {GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {33-42} }
Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Wanqi and Guo, Jingcai and Cai, Yuxiang and Chen, Zhi}, title = {Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7747-7757} }
Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7717-7727} }
NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huynh_2026_CVPR, author = {Huynh, Quang Dang and Yin, Xuefei and Busch, Andrew and Espinosa, Hugo G. and Liew, Alan Wee-Chung and Worsey, Matthew T.O. and Zhu, Yanming}, title = {NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8368-8377} }
DynaMind: Reconstructing Dynamic Visual Scenes from EEG by Aligning Temporal Dynamics and Multimodal Semantics to Guided Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Junxiang and Lin, Junming and Zhou, Jie and Xiong, Wei and Li, Jiangtong and Li, Jie and Zhuang, Jie and Ji, Hongfei}, title = {DynaMind: Reconstructing Dynamic Visual Scenes from EEG by Aligning Temporal Dynamics and Multimodal Semantics to Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5378-5387} }
AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yue and Wang, Yingyao and Bu, Pi and Xing, Jingxuan and Jiang, Wei and Zhu, Zekun and Ma, Junpeng and Zhou, Sashuai and Lu, Tong and Song, Jun and Cheng, Yu and Jiang, Yuning and Zheng, Bo}, title = {AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1903-1912} }
TP-Seg: Task-Prototype Framework for Unified Medical Lesion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiawei and Zhou, Qiangqiang and Zhu, Dandan and Chen, Yong and Yi, Yugen and Zhao, Xiaoqi}, title = {TP-Seg: Task-Prototype Framework for Unified Medical Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5452-5462} }
C^2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zhao, Kaiyan and Wang, Yiming and Yang, Ming and Rao, Bin and Li, Zhenning}, title = {C{\textasciicircum}2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1128-1137} }
OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Junhui and Cai, Zhizhen and Wang, Puze and Ke, Guanzhou and Yang, Jianhua and Zhang, Man and Zhang, Qiang and He, Shengfeng}, title = {OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6717-6726} }
Generative Event Pretraining with Foundation Model Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Jianwen and Xing, Jiaxu and Messikommer, Nico and Scaramuzza, Davide}, title = {Generative Event Pretraining with Foundation Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3189-3199} }
RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Suhang and Hu, Wei and Su, Yuhang and Zhang, Fan}, title = {RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9878-9887} }
Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Hongye and Krawczyk, Bartosz}, title = {Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7852-7861} }
DARTS: Distance-Aware Robust Training for Selective Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Sayyed_2026_CVPR, author = {Sayyed, A. Q. M. Sazzad and Bastian, Nathaniel D. and Restuccia, Francesco}, title = {DARTS: Distance-Aware Robust Training for Selective Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8806-8815} }
Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiming and Wei, Yujie and Feng, Lei and Su, Xiu and Xia, Xiaobo and Guan, Weili and Xie, Zeke and Yang, Shuo}, title = {Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9597-9607} }
D^2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoai and Wang, Hang and Liu, Yan and Hu, Huan and Yu, Bruce X.B.}, title = {D{\textasciicircum}2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8205-8214} }
Generated Reality: Human-Centric World Simulation Using Interactive Video Generation with Hand and Camera Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Linxi and Sun, Lisong C. and Neall, Ashley and Wu, Tong and Cai, Shengqu and Wetzstein, Gordon}, title = {Generated Reality: Human-Centric World Simulation Using Interactive Video Generation with Hand and Camera Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3998-4008} }
Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goral_2026_CVPR, author = {Goral, Gracjan and Ziarko, Alicja and Milos, Piotr and Nauman, Michal and Wolczyk, Maciej and Kosinski, Michal}, title = {Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1721-1730} }
Parallel In-context Learning for Large Vision Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yamaguchi_2026_CVPR, author = {Yamaguchi, Shin'ya and Chijiwa, Daiki and Sakao, Tamao and Hasegawa, Taku}, title = {Parallel In-context Learning for Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5796-5806} }
Training-Free Cross-Modal Alignment via Anchor Profiles with Statistical Significance Testing-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Kuo and Lu, Jianglin and Fu, Yun}, title = {Training-Free Cross-Modal Alignment via Anchor Profiles with Statistical Significance Testing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5704-5713} }
BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Zhien and Tang, Zhaohui and Zhang, Hu and Pan, Mingjun and Luo, Jin and Xie, Yongfang}, title = {BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1-11} }
mmDiff: A Noise-Robust Differentiable Ray-Tracing Framework for mmWave Scene Calibration and Channel Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Haofan and Cao, Yadi and Yi, Wanghao and Abari, Omid}, title = {mmDiff: A Noise-Robust Differentiable Ray-Tracing Framework for mmWave Scene Calibration and Channel Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6435-6444} }
MARIO: Motion-Augmented Real-Time Multi-Sensor Inertial Odometry-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yiquan and Yeon, Taeyoung and Gao, Chenfeng and Xu, Vasco and Liu, Xuanyou and Ahuja, Karan}, title = {MARIO: Motion-Augmented Real-Time Multi-Sensor Inertial Odometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3460-3469} }
A Simple yet Effective Data Scaling Strategy for Semi-Supervised Medical Image Segmentation-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yajun}, title = {A Simple yet Effective Data Scaling Strategy for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5255-5264} }
FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Tianhao and Jiang, Linlian and Zuo, Xinxin and Wang, Yang and Popa, Tiberiu}, title = {FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {202-212} }
Efficient Unlearning through Maximizing Relearning Convergence Delay-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Khoa and Woo, Simon S.}, title = {Efficient Unlearning through Maximizing Relearning Convergence Delay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7968-7977} }
Deep Parameter Interpolation for Scalar Conditioning-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Chicago Y. and McCann, Michael T. and Garcia-Cardona, Cristina and Wohlberg, Brendt and Kamilov, Ulugbek S.}, title = {Deep Parameter Interpolation for Scalar Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3884-3892} }
Zero-Shot Textual Explanations via Translating Decision-Critical Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamauchi_2026_CVPR, author = {Yamauchi, Toshinori and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Zero-Shot Textual Explanations via Translating Decision-Critical Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3282-3292} }
Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification-
[pdf]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Lulu and Qin, Jiaxiang and Yan, Ruiheng and Pan, Ning and Liu, Haihua and Chen, Xinxin}, title = {Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2564-2574} }
HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Suzuki_2026_CVPR, author = {Suzuki, Keito and Chen, Kunyao and Wang, Lei and Du, Bang and Li, Runfa Blark and Liu, Peng and Bi, Ning and Nguyen, Truong}, title = {HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {624-634} }
Channel Correlation Loss for Binary Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Xindi and Zhang, Wei and Yu, Hai and Zhu, Zhiliang}, title = {Channel Correlation Loss for Binary Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2575-2584} }
Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation-
[pdf]
[bibtex]@InProceedings{Somers_2026_CVPR, author = {Somers, Jacey and Zale, Harrison and Mason, Janine and Walker, Tina and Quinn, Eddie and Lewis, Felix and Wright, Gavin and Young, Yvonne and Sullivan, Charles and Carter, Wayne and Foster, Julian}, title = {Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8534-8543} }
Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuefei and Liu, Jiang and Lin, Xiaodong and Tang, Ruixiang}, title = {Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9165-9174} }
JACoP: Joint Alignment for Compliant Multi-Agent Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Qingze Tony and Mrdovic, Alen and Li, Danrui and Schwartz, Mathew and Yoon, Sejong and Kapadia, Mubbasir}, title = {JACoP: Joint Alignment for Compliant Multi-Agent Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {910-919} }
Learning from Noisy Prompts: Saliency-Guided Prompt Distillation for Robust Segmentation with SAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Jingxuan and Zhang, Ziqi and Zheng, Shaoming and Li, Shuang and Patel, Uday Bharat and Fitzhugh, Alexander Harry and Lung, Phillip and Kiberu, Yusuf and Jathanna, Nikesh and Jamil-Copley, Shahnaz and Kainz, Bernhard and Qin, Chen}, title = {Learning from Noisy Prompts: Saliency-Guided Prompt Distillation for Robust Segmentation with SAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5224-5234} }
Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Songlong and Wang, Weijie and Zhao, Zhengyu and Gu, Jindong and Torr, Philip and Sebe, Nicu}, title = {Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {737-747} }
LLM Guided Multi Style Typography and Layout Generation via Dynamic Direct Preference Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Chen and Yi, Shengzhou and Xiao, Ling and Yamasaki, Toshihiko}, title = {LLM Guided Multi Style Typography and Layout Generation via Dynamic Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5725-5734} }
Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Hu and Long, Chengjiang and Zhang, Jiqing and Jiang, Chuanlu and Ge, Huilin and Yin, Erwei and Yin, Baocai and Yang, Xin}, title = {Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1335-1345} }
Revisiting Articulated Parts Perception in Robot Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoqian and Guo, Yejie and Chen, Xiaoyang and Yang, Lixin and Lu, Cewu and Li, Yong-Lu}, title = {Revisiting Articulated Parts Perception in Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1368-1377} }
Seeing Helps Reasoning in Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Gan_2026_CVPR, author = {Gan, Yulu and Zhao, Kaiya Ivy and Poggio, Tomaso and Isola, Phillip}, title = {Seeing Helps Reasoning in Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7080-7090} }
FlowSteer: Conditioning Flow Field for Consistent Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wickremasinghe_2026_CVPR, author = {Wickremasinghe, Tharindu and Qi, Chenyang and Weligampola, Harshana and Tu, Zhengzhong and Chan, Stanley H.}, title = {FlowSteer: Conditioning Flow Field for Consistent Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5106-5116} }
DraCo: Draft as CoT for Text-to-Image Preview and Rare Concept Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Dongzhi and Zhang, Renrui and Li, Haodong and Zong, Zhuofan and Guo, Ziyu and He, Jun and Guo, Claire and Ye, Junyan and Fang, Rongyao and Li, Weijia and Liu, Rui and Li, Hongsheng}, title = {DraCo: Draft as CoT for Text-to-Image Preview and Rare Concept Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5903-5914} }
Vision Language Models are Confused Tourists-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Irawan_2026_CVPR, author = {Irawan, Patrick Amadeus and Hanif, Ikhlasul Akmal and Al Kautsar, Muhammad Dehan and Winata, Genta Indra and Koto, Fajri and Aji, Alham Fikri}, title = {Vision Language Models are Confused Tourists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1763-1773} }
TAPNext++: What's Next for Tracking Any Point (TAP)?-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Sebastian and Zholus, Artem and Sundermeyer, Martin and Doersch, Carl and Goroshin, Ross and Tan, David Joseph and Chandar, Sarath and Triebel, Rudolph and Tombari, Federico}, title = {TAPNext++: What's Next for Tracking Any Point (TAP)?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8429-8438} }
RelativeFlow: Taming Medical Image Denoising Learning with Noisy Reference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxin and Dong, Yiqing and Yu, Wenxue and Wu, Zhan and Ge, Rongjun and Chen, Yang and He, Yuting}, title = {RelativeFlow: Taming Medical Image Denoising Learning with Noisy Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5673-5682} }
HypHOI: Exploring Hierarchical Hyperbolic Embeddings for Human-Object Interaction Detection-
[pdf]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yixin and Liu, Yu and Wang, Weimin and Guo, Yanming and Jia, Qi}, title = {HypHOI: Exploring Hierarchical Hyperbolic Embeddings for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6517-6527} }
Drive-Cascade: Autoregressive Occupancy to LiDAR and Video Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Shuangming and Huang, Yuehao and Yi, Yao and Xie, Yijia and Wang, Jingke and Wang, Ruoyu and Lv, Jiajun and Xu, Guanglin and Ye, AiXue and Liu, Bingbing and Cheng, Siyuan and Zhang, Hongbo and Ma, Yukai and Liu, Yong}, title = {Drive-Cascade: Autoregressive Occupancy to LiDAR and Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4552-4561} }
From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ruan_2026_CVPR, author = {Ruan, Cihan and Zhou, Lebin and Zhao, Bingqing and Han, Rongduo and Yuan, Qiming and Zhu, Chenchen and Han, Linyi and Yang, Liang and Wang, Wei and Jiang, Wei and Ling, Nam}, title = {From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8544-8553} }
A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing-
[pdf]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Shiwei and Zhang, Lan and Wang, Zhenlin and Yuan, Xiaoyong}, title = {A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8039-8048} }
ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Wenyang and Hu, Zhanxuan and Zhang, Yaping and Ning, Hailong and Tai, Yonghang}, title = {ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7408-7418} }
Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tianle and Chakka, Chaitanya and Akula, Arjun Reddy and Thomas, Xavier and Ghadiyaram, Deepti}, title = {Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2142-2151} }
Discovering Attention Head Interactions in Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Zhenyu and Jia, Yuheng and You, Wei and Chen, Hao}, title = {Discovering Attention Head Interactions in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3332-3342} }
Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Yunlong and Shimada, Daiki and Hua, Hang and Huang, Chao and Bi, Jing and Feris, Rogerio and Xu, Chenliang}, title = {Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8314-8325} }
Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yushuo and Duan, Huiyu and Zhang, Zicheng and Liu, Xiaohong and Min, Xiongkuo}, title = {Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7208-7219} }
Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Zhang, Shikun and Ye, Wei}, title = {Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9574-9585} }
MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Jian and Jiao, Yifan and Shao, Xi and Bao, Bing-Kun}, title = {MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7758-7767} }
SwiftPie: Lightning-fast Subject-driven Image Personalization via One step Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duong_2026_CVPR, author = {Duong, Huy and Nguyen, Trong-Tung and Pham, Cuong and Tran, Anh and Nguyen, Khoi and Hoai, Minh}, title = {SwiftPie: Lightning-fast Subject-driven Image Personalization via One step Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4708-4718} }
When Models Learn to Ask Why: Adaptive Causal Reasoning for Trustworthy Medical Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jianxin and Zhu, Chunzheng and Kneuertz, Peter J and Bai, Yunfei and Xue, Yuan}, title = {When Models Learn to Ask Why: Adaptive Causal Reasoning for Trustworthy Medical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5556-5568} }
MVSSM: Motion-aware Visual State Space Model for Efficient Video Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chen and Wu, Tao and Liu, Wei and Wu, Xi and Fu, Ying}, title = {MVSSM: Motion-aware Visual State Space Model for Efficient Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4855-4865} }
Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Biscarrat_2026_CVPR, author = {Biscarrat, Camille and Gharbi, Micha\"el and Goel, Rahul and Ragan-Kelley, Jonathan and Durand, Fr\'edo and Li, Tzu-Mao}, title = {Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1283-1293} }
From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Haoming and Liu, Jinnuo and Li, Yanhao and Bai, Liuyang and Ji, Yunkai and Guo, Yuanhe and Wan, Shenji and Wen, Hongyi}, title = {From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2649-2658} }
HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2408-2418} }
Learning to Select Visual In-Context Demonstrations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Eugene and Lin, Yu-Chi and Diao, Jiajie}, title = {Learning to Select Visual In-Context Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9455-9465} }
FSMC-Pose: Frequency and Spatial Fusion with Multiscale Self-Calibration for Cattle Mounting Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Fangjing and Wang, Zhihai and Ding, Xinxin and Liu, Haiyang and Gao, Ronghua and Wang, Rong and Zhu, Yao and Jin, Ming}, title = {FSMC-Pose: Frequency and Spatial Fusion with Multiscale Self-Calibration for Cattle Mounting Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3620-3629} }
Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaobin and Diao, Changyu and Li, Min and Yu, Ruohan and Xu, Duanqing}, title = {Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {223-232} }
ZODS-RS -- Zero-Training Oriented Detection & Segmentation for Remote Sensing-
[pdf]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zuan and Gao, Tianhan and Zhao, Langxu}, title = {ZODS-RS -- Zero-Training Oriented Detection \& Segmentation for Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6322-6330} }
Concise Geometric Description as a Bridge: Unleashing the Potential of LLM for Plane Geometric Problem Solving-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jingyun and Li, Dian and Wang, Xiaohan and Liu, Gang and Yan, Jiahong and Kang, Guoliang}, title = {Concise Geometric Description as a Bridge: Unleashing the Potential of LLM for Plane Geometric Problem Solving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5958-5967} }
Rich Feature Learning via Diversification-
[pdf]
[supp]
[bibtex]@InProceedings{Leng_2026_CVPR, author = {Leng, Xi and Chen, Yongqiang and Tang, Xiaoying and Bian, Yatao}, title = {Rich Feature Learning via Diversification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2462-2472} }
MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyi and Ma, Xianping and Wang, Ziyao and Zhang, Hongyang and Pun, Man On}, title = {MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7251-7261} }
Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Qiji and Yang, Chuanguang and An, Zhulin and Huang, Libo and Zhao, Erhu and Li, Yuqi and Xu, Yongjun}, title = {Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9531-9541} }
SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yonghan and Huang, Tsung-Wei and Gehlot, Shiv and Choi, Jaehoon and Su, Guan-Ming and Manocha, Dinesh}, title = {SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {77-87} }
When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Hossain_2026_CVPR, author = {Hossain, Md Zarif and Fime, Awal Ahmed and Imteaj, Ahmed}, title = {When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {758-767} }
PTAD: Pose and Texture Agnostic Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuo_2026_CVPR, author = {Zhuo, Wei and Xiang, Jianen and Liu, Miaomiao and Lu, Huajun}, title = {PTAD: Pose and Texture Agnostic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6779-6788} }
Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yizhao and Zhu, Hongyuan and Liu, Caiyun and Wang, Tianfu and Chen, Keyu and Xu, Sicheng and Yang, Jiaolong and Yuan, Nicholas jing and Zhang, Qi}, title = {Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {635-646} }
Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation-
[pdf]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Junhao and Zhang, Chaoyang and Zhang, Yecheng and Zhou, Chengyang and Wang, Zhichang and Liu, Bochun and Yin, Dongshuo}, title = {Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8607-8617} }
BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dang_2026_CVPR, author = {Dang, Ba Luan and Truong, Vu Tuan and Le, Long Bao}, title = {BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {726-736} }
GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Pengyu and Dai, Yuqin and Yin, Jun and Zhong, Jing and Han, Ziyang and Shi, Chaoyang and Jin, ZhanXiang and Jiang, Maowei and Han, Yuxing and Lu, Shuai}, title = {GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8596-8606} }
Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Kaiwen and Zheng, Kaili and Shi, Yiming and Guo, Chenyi and Wu, Ji}, title = {Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3532-3542} }
Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xiaoxi and Sun, Bo and An, Yisheng and Liu, Ganchao}, title = {Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7188-7197} }
Meta-CDMTransNet: Cross-Domain Multi-Scale Transformer Meta-Learning Framework for Few-Shot Breast Histopathological Image Classification-
[pdf]
[bibtex]@InProceedings{Mohanta_2026_CVPR, author = {Mohanta, Anindita and Roy, Sourav Dey and Saha, Priya and Nath, Niharika and Bhowmik, Mrinal Kanti}, title = {Meta-CDMTransNet: Cross-Domain Multi-Scale Transformer Meta-Learning Framework for Few-Shot Breast Histopathological Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5525-5534} }
PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Picard_2026_CVPR, author = {Picard, David and Dufour, Nicolas and Degeorge, Lucas and Ghosh, Arijit and Allegro, Davide and Ravaud, Tom and Perron, Yohann and Sautier, Corentin and Baltaci, Zeynep Sonat and Meng, Fei and Kalleli, Syrine and L\'opez-Rauhut, Marta and Loiseau, Thibaut and Albouy, S\'egol\`ene and Baena, Raphael and Vincent, Elliot and Landrieu, Loic}, title = {PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2544-2553} }
D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenlun and Zhong, Yunshan and Ding, Zihao and Li, Xinyu and Yoshioka, Kentaro}, title = {D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2978-2987} }
Label-Agnostic Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2026_CVPR, author = {Bian, Yuwei and Wang, Shidong and Li, Chunming and Zhang, Haofeng}, title = {Label-Agnostic Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7573-7582} }
CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hangyu and Cao, Bofeng and Liang, Zhaohui and Li, Wuzhen and Oh, Juyoung and Chen, Yuxuan and Liang, Shixiao and Zhou, Hang and Ma, Chengyuan and Liu, Jiaxi and Li, Zheng and Zhang, Peng and Long, Keke and Liu, Maolin and Jiang, Jackson and Yu, Chunlei and Liu, Shengxiang and Yu, Hongkai and Li, Xiaopeng}, title = {CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2294-2303} }
Autoregressive Universal Video Segmentation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heo_2026_CVPR, author = {Heo, Miran and Hwang, Sukjun and Chen, Min-Hung and Wang, Yu-Chiang Frank and Gu, Albert and Kim, Seon Joo and Hachiuma, Ryo}, title = {Autoregressive Universal Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7429-7438} }
RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Liu and Wang, Xiaofeng and Zhao, Guosheng and Li, Keyu and Qin, Wenkang and Zhu, Jiagang and Qiu, Jiaxiong and Huang, Guan and Su, Zhizhong}, title = {RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1410-1420} }
LiteEmbed: Adapting CLIP to Rare Classes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agarwal_2026_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {LiteEmbed: Adapting CLIP to Rare Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6133-6142} }
HAFM: A Post-Fusion Gating Module for Haze-Aware RGB-Thermal Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Saeteros_2026_CVPR, author = {Saeteros, Juan M. and Ar\'evalo, Nick J. and Vintimilla, Boris X.}, title = {HAFM: A Post-Fusion Gating Module for Haze-Aware RGB-Thermal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6197-6207} }
Inf-Dehaze: Beyond GPU Memory Constraints for Ultra-High-Resolution Image Dehazing-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Xinyu and Chen, Jiuchen and Xu, Qizhi}, title = {Inf-Dehaze: Beyond GPU Memory Constraints for Ultra-High-Resolution Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5086-5095} }
SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kao and Song, Tao and Hu, Zhihua and Li, Ming and Ding, Xin}, title = {SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2596-2605} }
Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Chaki_2026_CVPR, author = {Chaki, Sayan Kumar and Fournel, Thierry and Emonet, R\'emi}, title = {Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7008-7017} }
UniLat3D: Geometry-Appearance Unified Latents for Single-Stage 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Guanjun and Fang, Jiemin and Yang, Chen and Li, Sikuang and Yi, Taoran and Lu, Jia and Zhou, Zanwei and Cen, Jiazhong and Xie, Lingxi and Zhang, Xiaopeng and Wei, Wei and Liu, Wenyu and Wang, Xinggang and Tian, Qi}, title = {UniLat3D: Geometry-Appearance Unified Latents for Single-Stage 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4366-4378} }
VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2026_CVPR, author = {Pang, Bo and Xu, Chenxi and Ren, Jierui and Wang, Guoping and Li, Sheng}, title = {VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2028-2037} }
RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies-
[pdf]
[supp]
[bibtex]@InProceedings{Garcia-Cobo_2026_CVPR, author = {Garcia-Cobo, Guillermo and Igl, Maximilian and Karkus, Peter and Zhang, Zhejun and Watson, Michael and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1000-1009} }
Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xi and Zhu, Hanwei and Wang, Jiamang and Wu, Xiaolin and Lin, Weisi}, title = {Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9305-9315} }
IM-Animation: An Implicit Motion Representation for Identity-Decoupled Character Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhufeng and Gao, Xuan and Liu, Feng-Lin and Zhang, Haoxian and Fang, Zhixue and Lai, Yu-Kun and Liu, Xiaoqiang and Wan, Pengfei and Gao, Lin}, title = {IM-Animation: An Implicit Motion Representation for Identity-Decoupled Character Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4635-4646} }
Is Your Text-to-Image Model Robust to Caption Noise?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Weichen and Yang, Ziyan and Lin, Shanchuan and Zhao, Qi and Wang, Jianyi and Gui, Liangke and Fredrikson, Matt and Jiang, Lu}, title = {Is Your Text-to-Image Model Robust to Caption Noise?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3789-3798} }
Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinzhuo and Juvekar, Adheesh and Zhang, Jiaxun and Liu, Xingyou and Wahed, Muntasir and Nguyen, Kiet A. and Shen, Yifan and Yu, Tianjiao and Lourentzou, Ismini}, title = {Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7450-7460} }
SyntheticManga: Training-Free Manga Generation with Phased Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Xuelei and Tang, Chi-Keung and Tai, Yu-Wing}, title = {SyntheticManga: Training-Free Manga Generation with Phased Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4410-4418} }
FALCON: Fast Adaptive Lightweight Computation of Intensities and Events for Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Venugopal_2026_CVPR, author = {Venugopal, Sankarshana and Mostafavi, Mohammad and Choi, Jonghyun}, title = {FALCON: Fast Adaptive Lightweight Computation of Intensities and Events for Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5024-5033} }
Self-Evolving 3D Scene Generation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Kaizhi and Fan, Yue and Gu, Jing and Xu, Zishuo and He, Xuehai and Wang, Xin Eric}, title = {Self-Evolving 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {579-590} }
SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jikai and Gui, Xingtai and Gong, Jiahao and Tan, Feiyang and Han, Wencheng and Xu, Cheng-Zhong and Shen, Jianbing}, title = {SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1039-1049} }
Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Brahma_2026_CVPR, author = {Brahma, Debarshi and Biswas, Soma}, title = {Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {656-665} }
Adversarial Concept Distillation for One-Step Diffusion Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yixiong and Wu, Tao and Li, Senmao and Yang, Shiqi and Wang, Yaxing and van de Weijer, Joost and Wang, Kai}, title = {Adversarial Concept Distillation for One-Step Diffusion Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4321-4333} }
Bootstrapping Sign Language Annotations with Sign Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lea_2026_CVPR, author = {Lea, Colin and Baltatzis, Vasileios and Gillis, Connor and Kushalnagar, Raja and Quandt, Lorna and Findlater, Leah}, title = {Bootstrapping Sign Language Annotations with Sign Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3630-3640} }
RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kilinc_2026_CVPR, author = {Kilinc, Ozsel and Tarhan, Cem}, title = {RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1159-1169} }
MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Changho and Kim, Minho and Kim, Jinkyu}, title = {MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2659-2668} }
MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2026_CVPR, author = {Leng, Sicong and Wang, Jing and Li, Jiaxi and Zhang, Hao and Hu, Zhiqiang and Zhang, Boqiang and Jiang, Yuming and Zhang, Hang and Li, Xin and Zhao, Deli and Lu, Wei and Rong, Yu and Sun, Aixin and Lu, Shijian}, title = {MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9075-9087} }
Seeing Through Fog: Towards Fog-Invariant Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Enqi and Pan, Liyuan and Gao, Zhi and Li, Lingzhi and Li, Qing}, title = {Seeing Through Fog: Towards Fog-Invariant Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6966-6975} }
Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Cheng-Yen and Huang, Hsiang-Wei and Chen, Kuang-Ming and Li, Kunjun and Hwang, Jenq-Neng}, title = {Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8358-8367} }
From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Xiang and Wang, Yu-Shuen}, title = {From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9520-9530} }
RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yangfan and Zhang, Hanwei and Huang, Ke and Wang, Qiufeng and Shao, Zhenzhou and Wu, Dengyu}, title = {RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1492-1502} }
ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haiming and Wang, Tai}, title = {ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8164-8174} }
PolyReal: A Benchmark for Real-World Polymer Science Workflows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Wanhao and Wang, Weida and Xie, Jiaqing and Yang, Suorong and Wang, Jue and Chen, Benteng and Mei, Guangtao and Yang, Zonglin and Zhang, Shufei and Mo, Yuchun and Cheng, Lang and Zeng, Jin and Li, Houqiang and Ouyang, Wanli and Li, Yuqiang}, title = {PolyReal: A Benchmark for Real-World Polymer Science Workflows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1954-1964} }
Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Dai, Wei and Wang, Haoyu and Yang, Sihan and Bi, Haixia and Sun, Jian}, title = {Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7520-7529} }
GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Chaewon and Heo, JunHyeok and Kim, Chang-Su}, title = {GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8736-8745} }
Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Linsi and Shen, Gang and Lv, Xuefei and Wu, Chenglong and Pei, Yuru}, title = {Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2669-2679} }
On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Ali_2026_CVPR, author = {Ali, Ziad Tariq Muhammad and Azad, Raja Muhammad Atif and Azad, Muhammad Ajmal and Rice, Iain and Daraz, Umar and Imran, Ali Shariq and Holyhead, James}, title = {On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {809-818} }
SafetyBPO: Bidirectional Preference Optimization for Safe Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, You and Zhu, Beier and Zhang, Chi}, title = {SafetyBPO: Bidirectional Preference Optimization for Safe Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4759-4768} }
FastMMoE: Accelerating Multimodal Large Language Models through Dynamic Expert Activation and Routing-Aware Token Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Guoyang and Ding, Yifeng and Li, Fengfa and Ren, Lei and Chen, Wei and Feng, Fangxiang and Wang, Xiaojie}, title = {FastMMoE: Accelerating Multimodal Large Language Models through Dynamic Expert Activation and Routing-Aware Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5915-5924} }
Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hsu_2026_CVPR, author = {Hsu, YuChe and Wang, AnJui and Ni, TsaiChing and Yang, YuanFu}, title = {Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8705-8714} }
HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shurui and Chen, Weide and Wu, Ancong}, title = {HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {538-546} }
Vision-R1: Evolving Human-Free Alignment in Large Vision-Language Models via Vision-Guided Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yufei and Zhu, Yousong and Zhao, Hongyin and Yang, Fan and Zheng, Shurong and Tang, Ming and Wang, Jinqiao}, title = {Vision-R1: Evolving Human-Free Alignment in Large Vision-Language Models via Vision-Guided Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5807-5817} }
WildAni4D: Towards 4D Animal Mesh Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Gyeongsu and Hu, Hezhen and Soon, Donghyeon and Kang, Changwoo and Joo, Kyungdon}, title = {WildAni4D: Towards 4D Animal Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {160-169} }
MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongyu and Liu, Pengbo}, title = {MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9674-9683} }
PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Wang, Yan and Shao, Wenrui and Xie, Zhaoheng}, title = {PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2534-2543} }
LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rui_2026_CVPR, author = {Rui, Yicheng and Duan, Xiao-Wei and Deng, Licai and Yang, Fan and Dang, Zhengming and Du, Zhengjun and Peng, Junhao and Chu, Wenhao and Mahmut, Umut and Li, Kexin and Wu, Yiyun and Feng, Fabo}, title = {LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1774-1785} }
SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thengane_2026_CVPR, author = {Thengane, Vishal and An, Zhaochong and Huang, Tianjin and Phung, Son Lam and Bouzerdoum, Abdesselam and Yin, Lu and Zhao, Na and Zhu, Xiatian}, title = {SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7368-7377} }
From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dinh_2026_CVPR, author = {Dinh, My H. and Sant, Aditya and Malhotra, Akshay and Patani, Keya and Hamidi-Rad, Shahab}, title = {From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7070-7079} }
MedSAD-CLIP: Supervised CLIP with Token-Patch Cross-Attention for Medical Anomaly Detection and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Thuy Truong and Do, Minh Kha and Duy, Phuc Nguyen and Lee, Min Hun}, title = {MedSAD-CLIP: Supervised CLIP with Token-Patch Cross-Attention for Medical Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5168-5178} }
Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junyu and Harun, Md Yousuf and Kanan, Christopher}, title = {Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2284-2293} }
UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Shuquan and Wu, Yuhang and Ma, Jia and Ding, Yifan and Song, Zihan and Chen, Bingqi and Zheng, Xiawu and Li, Hui and Ji, Rongrong}, title = {UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8575-8584} }
SCP: Spatial Causal Prediction in Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yanguang and Yang, Jie and Wu, Shengqiong and Hu, Shutong and Qiu, Hongbo and Wang, Yu and Zhang, Guijia and Ze, Tan Kai and Fei, Hao and Lin, Chia-Wen and Lee, Mong-Li and Hsu, Wynne}, title = {SCP: Spatial Causal Prediction in Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7165-7175} }
RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zhichao and Liu, Yepeng and Su, Zhiling and Zhu, Huachao and Gu, Yuliang and Zou, Yuda and Liu, Zelong and Xia, Gui-Song and Du, Bo and Xu, Yongchao}, title = {RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1752-1762} }
CineMatte: Background Matting for Virtual Production and Beyond-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yuanjian and Zhang, Chen and Chen, Fasheng and Cao, Jiangbo}, title = {CineMatte: Background Matting for Virtual Production and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8725-8735} }
ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mitra_2026_CVPR, author = {Mitra, Sirshapan and Rawat, Yogesh S}, title = {ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {22-32} }
USV: Unified Sparsification for Accelerating Video Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xinjian and Wang, Hongmei and Zhou, Yuan and Lu, Qinglin}, title = {USV: Unified Sparsification for Accelerating Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4180-4189} }
DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Tepakbong_2026_CVPR, author = {Tepakbong, Cyril Kana and Bouchard, K\'evin and Maitre, Julien}, title = {DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7483-7492} }
CtrlISP: Rescuing Low-Light RAW Images via Controllable Neural ISP-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Li, Yachun and Du, Hang and Yang, Shicai and Xie, Di and Zhu, Jiang and Yang, Yang}, title = {CtrlISP: Rescuing Low-Light RAW Images via Controllable Neural ISP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4888-4897} }
Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Feng and Gou, Chenhui and He, Yefei and Yang, Yang and Zhuang, Bohan and Wu, Qi}, title = {Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3050-3059} }
Complexity of Linear Regions in Self-supervised Deep ReLU Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Muthivhi_2026_CVPR, author = {Muthivhi, Mufhumudzi and van Zyl, Terence L.}, title = {Complexity of Linear Regions in Self-supervised Deep ReLU Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6911-6920} }
A Diagnostic Study of Region-Based Representations in Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ji and Cao, Shengcao and Wang, Yu-Xiong}, title = {A Diagnostic Study of Region-Based Representations in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5978-5988} }
Towards Noise-Robust Medical Segmentation via Chebyshev-Attention-Based Asymmetric UNet-
[pdf]
[supp]
[bibtex]@InProceedings{Xin_2026_CVPR, author = {Xin, Yue and Zheng, Ziyang and Dai, Wenrui and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {Towards Noise-Robust Medical Segmentation via Chebyshev-Attention-Based Asymmetric UNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5235-5244} }
Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Zhiqiu and Mu, Furong and Li, Qi and Zhang, Shanshan and Gui, Jie and Wang, Chunpeng and Liu, Yunan}, title = {Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1650-1659} }
Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Peicheng and Fang, Shancheng and Jin, Chenhui and Pu, Bowei and Xie, Hongtao}, title = {Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6799-6808} }
A Simple Framework for Visual Navigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Johnson_2026_CVPR, author = {Johnson, Faith and Cao, Bryan Bo and Jain, Shubham and Ashok, Ashwin and Dana, Kristin}, title = {A Simple Framework for Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3167-3177} }
Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Yiwei and Chen, Zan and Wang, Bo and Zhou, Xiaofei}, title = {Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2324-2334} }
DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yadav_2026_CVPR, author = {Yadav, Dhenenjay and Sawai, Rohan}, title = {DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7625-7633} }
HeartcareGPT: A Unified Multimodal ECG Suite for Dual Signal-Image Modeling and Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yihan and Li, Sijing and Wang, Zhuonan and Lin, Tianwei and Yang, Chenglin and Zhong, Yu and Yan, Wenjie and Zhang, Wenqiao and Guo, Xiaogang and Xiao, Jun and Zhuang, Yueting and Ooi, Beng Chin}, title = {HeartcareGPT: A Unified Multimodal ECG Suite for Dual Signal-Image Modeling and Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6228-6238} }
Rethinking VLMs for Image Forgery Detection and Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Shaofeng and Cui, Jiequan and Hong, Richang}, title = {Rethinking VLMs for Image Forgery Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5828-5837} }
Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Zhilong and Zhang, Hang and Li, Yanmin and Liu, Lihua and Wu, Jibing and Wang, Mao}, title = {Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7810-7819} }
MReactor: Offline Multiple Appropriate Facial Reaction Generation with Hierarchical Cognitive Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Jiachen and He, Jiajun and Shen, Shuai and Wang, Lin and Phan, Huy and Reiss, Joshua and Haijun, Lin and Schuller, Bjoern and Fu, Zeyu and Song, Siyang}, title = {MReactor: Offline Multiple Appropriate Facial Reaction Generation with Hierarchical Cognitive Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3354-3363} }
UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Junliang and Huang, Zehuan and Qu, Yansong and Wang, Chunshi and Yang, Yunhan and Li, Yang and Luo, Yawei and Chen, Zhuo and Lu, Sheng and Zhu, Jun and Guo, Chunchao}, title = {UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {613-623} }
Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yanan and Xiang, Ziwei and Wu, Jiamin and Guo, Jinyang and Zhang, Hongyuan and Song, Chunfeng and Fang, Hongjian and Guo, Yufei and Liu, Xianglong}, title = {Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6667-6676} }
STORM: End-to-End Referring Multi-Object Tracking in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Zijia and Yi, Jingru and Wang, Jue and Chen, Yuxiao and Chen, Junwen and Li, Xinyu and Modolo, Davide}, title = {STORM: End-to-End Referring Multi-Object Tracking in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8347-8357} }
Towards Calibrated Gradient-based Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Linxiao and Yang, Mianzimei and Zhou, Zhipeng and Xie, Hong and Lian, Defu and Yang, Menglin}, title = {Towards Calibrated Gradient-based Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5127-5136} }
NAKUL-Med: Spectral-Graph State Space Models with Dynamics Kernels for Medical Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {NAKUL-Med: Spectral-Graph State Space Models with Dynamics Kernels for Medical Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5399-5408} }
Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Khiem, Le Huy and Tran, Van-Tuan and Doan, Khoa D and Chawla, Nitesh V. and Wong, Kok-Seng}, title = {Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7728-7736} }
Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jia and Li, Zhankai and Yu, Yongqiang and Yan, Xuehu and Lu, Yuliang}, title = {Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7935-7944} }
UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Wufei and Cen, Sky and Shen, Jianzhi and Lee, Rex and Begiristain, Le\'on and Zhuang, Yan and Peng, Jiawei and Yu, Zhifei and Song, Tianao and Qi, Xinyuan and Shu, Tianmin and Kortylewski, Adam and Yuille, Alan}, title = {UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9716-9725} }
C3-Diff: Super-resolving Spatial Transcriptomics via Cross-modal Cross-content Contrastive Diffusion Modelling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaofei and Price, Stephen J and Li, Chao}, title = {C3-Diff: Super-resolving Spatial Transcriptomics via Cross-modal Cross-content Contrastive Diffusion Modelling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5463-5473} }
Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and S, Aarthi and Agarwal, Akshay}, title = {Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {857-866} }
Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Huan and Chen, Ping and Chen, Zezhou and Liu, Zhaoxiang and Wang, Zipeng and Liu, Xiang and Wang, Xin and Wang, Kai and Lian, Shiguo}, title = {Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1986-1995} }
CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinyu and Sun, Shiliang}, title = {CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9065-9074} }
GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation-
[pdf]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Changqun and Yin, Wangxiandi and Hu, Xin and Zhao, Lei and Zhang, Dongyang and He, Tao}, title = {GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7143-7153} }
FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Jinshan and Huang, Tingxuan and Jiang, Baoyang and Xiang, Liuyu and Ma, Qiang and Hu, Jianwei}, title = {FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2514-2523} }
DSA: Dynamic Step Allocation for Fast Autoregressive Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Thanh-Tung and Zhao, Yunhan and Chai, Menglei and Shen, Zhengyang and Cao, Zhe and Tang, Danhang and Xie, Xiaohui and Kong, Deying}, title = {DSA: Dynamic Step Allocation for Fast Autoregressive Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4334-4344} }
Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kawachi_2026_CVPR, author = {Kawachi, Hodaka and Nakamura, Tomoya and Santo, Hiroaki and Tedla, SaiKiran Kumar and Canham, Trevor D and Yagi, Yasushi and Brown, Michael S.}, title = {Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1273-1282} }
If you can describe it, they can see it: Cross-Modal Learning of Visual Concepts from Textual Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barbano_2026_CVPR, author = {Barbano, Carlo Alberto and Molinaro, Luca and Ciranni, Massimiliano and Aiello, Emanuele and Pastore, Vito Paolo and Grangetto, Marco}, title = {If you can describe it, they can see it: Cross-Modal Learning of Visual Concepts from Textual Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6122-6132} }
IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Qian and Fan, Shuaipeng and Gao, Fei and Zhang, Mingjin}, title = {IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8408-8418} }
AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhaorong and Kanamori, Yoshihiro and Endo, Yuki}, title = {AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {425-435} }
DeepFakeShield: A Proactive Defense Against Malicious Face Swapping-
[pdf]
[supp]
[bibtex]@InProceedings{Karimi-Bidhendi_2026_CVPR, author = {Karimi-Bidhendi, Saeed and DeGol, Joseph and Wengrowski, Eric and Roberts, Dominic and Dana, Kristin}, title = {DeepFakeShield: A Proactive Defense Against Malicious Face Swapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {867-877} }
Multimodal Reasoning with Explicit Reasoning Patterns and Rewards-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Han and Jin, Sheng and Zuo, Zhongrong and Wang, Ziyue and She, Qi and Shao, Ling and Lu, Shijian}, title = {Multimodal Reasoning with Explicit Reasoning Patterns and Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9542-9551} }
HAIT: Hybrid Adversarial Iterative Training for Mitigating Object Hallucination in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Liangjie and Wenjie, Liao and Feng, Ming and Song, Xiaohui and Li, Huafei and Lu, Haonan}, title = {HAIT: Hybrid Adversarial Iterative Training for Mitigating Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6072-6079} }
Anatomy-Aware Adaptive Feature Perturbation Framework for Semi-Supervised MRI Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Ji and Peng, Bo and Li, Suping and Zhang, Qianni}, title = {Anatomy-Aware Adaptive Feature Perturbation Framework for Semi-Supervised MRI Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5620-5631} }
Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zheyuan and Zhao, Qingsong and Wang, Yusong and Huang, Zhaohong and Li, Xinqi and Yuan, Chen and Shao, Jiawei and Zhang, Chi and Li, Xuelong}, title = {Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8585-8595} }
FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Shenghe and Zhang, Minyu and Liu, Tianhao and Wang, Hongzhi}, title = {FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2793-2802} }
VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Andong and Du, Dawei and Chen, Zhenfang and Zhong, Wen and Chen, Fan and Chen, Guang and Kuo, Chia-Wen and Wen, Longyin and Chen, Chen and Zhu, Sijie}, title = {VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2187-2196} }
HarmoniDiff-RS: Training-Free Diffusion Harmonization for Satellite Image Composition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xiaoqi and A Dos Santos, Jefersson and Han, Jungong}, title = {HarmoniDiff-RS: Training-Free Diffusion Harmonization for Satellite Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6352-6360} }
Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Zhenhe and Cao, Congqi and Hu, Lanshu and Pan, Liujie}, title = {Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2554-2563} }
Vote-in-Context: VLMs as Explainable Zero-Shot Rank Fusers-
[pdf]
[supp]
[bibtex]@InProceedings{Eltahir_2026_CVPR, author = {Eltahir, Mohamed and Habibullah, Ali and Ayash, Lama and Hussain, Tanveer and Khan, Naeemullah}, title = {Vote-in-Context: VLMs as Explainable Zero-Shot Rank Fusers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6496-6505} }
Harmonized Multi-Layer Text-to-Image Generation with Generative Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dalva_2026_CVPR, author = {Dalva, Yusuf and Li, Yijun and Liu, Qing and Zhao, Nanxuan and Zhang, Jianming and Lin, Zhe and Yanardag, Pinar}, title = {Harmonized Multi-Layer Text-to-Image Generation with Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8471-8480} }
ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lingjun and Luo, Yandong and Hays, James and Gan, Lu}, title = {ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1398-1409} }
PEPR: Privileged Event-based Predictive Regularization for Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Magrini_2026_CVPR, author = {Magrini, Gabriele and Becattini, Federico and Biondi, Niccol\`o and Pala, Pietro}, title = {PEPR: Privileged Event-based Predictive Regularization for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3209-3219} }
PCSTracker: Long-term Scene Flow Estimation for Point Cloud Sequences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Min and Xu, Gangwei and Wang, Xianqi and Peng, Yuyi and Yang, Xin}, title = {PCSTracker: Long-term Scene Flow Estimation for Point Cloud Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4920-4930} }
Vision Inference Former: Sustaining Visual Consistency in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Xinpeng and Zhang, Min and Han, Kairong and Tan, Xu and Wu, Fei and Kuang, Kun}, title = {Vision Inference Former: Sustaining Visual Consistency in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6100-6110} }
DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting-
[pdf]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Minwei and Wei, Yang and Xiao, Junhao and Bi, Xiuli and Xiao, Bin}, title = {DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {779-788} }
CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Bingyi and Zhu, Chuanhui and Xue, Hongfei and Teng, Jian and Liu, Jipeng and Wang, Enshu and Dai, Penglin and Wang, Pu}, title = {CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {99-108} }
100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Cunqi and Zhou, Peng and Qin, Jie and Tian, Qi}, title = {100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8449-8460} }
Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Canyu and Yang, Yuguang and Tan, Zhewen and Wang, Yizhi and Zhan, Ruiyi and Liu, Haiyan and Mao, Xuanyao and Bao, Jason and Tang, Xinyue and Yang, Linlin and Sun, Bingchuan and Wang, Yan and Zhang, Baochang}, title = {Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1062-1072} }
APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Geunyoung and Kim, Soohong and Kong, Inseok and Jung, Jiyoung}, title = {APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {789-798} }
R2MoE: Representation and Expert Selection Dual-Regularized Mixture-of-Experts for Multimodal Clinical Data-
[pdf]
[supp]
[bibtex]@InProceedings{Raza_2026_CVPR, author = {Raza, Wajih Hassan and Schiess, Mya and Lemus, Juan Martinez and Ellmore, Timothy Michael and Green, Charles and Soto, Claudio and Fu, Xin and Hu, Renjie}, title = {R2MoE: Representation and Expert Selection Dual-Regularized Mixture-of-Experts for Multimodal Clinical Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5776-5785} }
Adapting with an Open Mind: Leveraging Open-Vocabulary Detectors for Closed Set Source-Free Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Borgavi_2026_CVPR, author = {Borgavi, Kaustubh R and Shashikumar, Sarvesh and Arora, Chetan}, title = {Adapting with an Open Mind: Leveraging Open-Vocabulary Detectors for Closed Set Source-Free Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6570-6581} }
TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Belagali_2026_CVPR, author = {Belagali, Varun and Kapse, Saarthak and Marza, Pierre and Das, Srijan and Li, Zilinghan and Boutaj, Sofi\`ene and Pati, Pushpak and Yellapragada, Srikar and Nandi, Tarak Nath and Madduri, Ravi K and Saltz, Joel and Prasanna, Prateek and Christodoulidis, Stergios and Vakalopoulou, Maria and Samaras, Dimitris}, title = {TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5440-5451} }
CLASH: A Benchmark for Cross-Modal Contradiction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Popordanoska_2026_CVPR, author = {Popordanoska, Teodora and Li, Jiameng and Blaschko, Matthew B.}, title = {CLASH: A Benchmark for Cross-Modal Contradiction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6051-6061} }
Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yinbo and Wu, Qi and Ye, Keyang and He, Xiao and Tian, Tian}, title = {Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8492-8503} }
GeoHOI: Geometry-Enhanced Human-Object Interaction Video Generation via Hierarchical Multi-Modal Injection-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Ziyi and Rao, Zejing and Cao, Juan and Liu, Xiaoqiang and Fang, Zhixue and Zhang, Haoxian and Tang, Songlin and Tang, Fan}, title = {GeoHOI: Geometry-Enhanced Human-Object Interaction Video Generation via Hierarchical Multi-Modal Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3739-3748} }
UniD-Shift: Towards Unified Semantic Segmentation via Interpretable Shared-Private Multimodal Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuai and Shi, Zhecheng and Li, Zhuoxiao and Ou, Jing and Wang, Tengxi and Liu, Yuan and Zhao, Wufan}, title = {UniD-Shift: Towards Unified Semantic Segmentation via Interpretable Shared-Private Multimodal Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6382-6393} }
Face Time Traveller : Travel Through Ages Without Losing Identity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kar_2026_CVPR, author = {Kar, Purbayan and Ghadiya, Ayush and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {Face Time Traveller : Travel Through Ages Without Losing Identity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8756-8765} }
Beyond Optimal Transport: Model-Aligned Coupling for Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yexiong and Yao, Yu and Zhou, Yang and Liu, Tongliang}, title = {Beyond Optimal Transport: Model-Aligned Coupling for Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3955-3964} }
Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Cong and Cheng, Gong}, title = {Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3040-3049} }
CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering-
[pdf]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Zai Yang and Wang, Changshuo and Shi, Yuan and Sun, Linjun and Wei, Shu and Wang, Tingran and Wu, Wangyu and Li, Yanjie and Li, Weijun}, title = {CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7220-7230} }
Prompt-driven Small Object Instance Segmentation in Earth Observation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenhao and Ji, Yingrui and Meng, Yu and Zhang, Yunjian and Zhu, Yao}, title = {Prompt-driven Small Object Instance Segmentation in Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7347-7356} }
Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yachan and Lu G\'omez, Jose and Xue, Danna and Xiao, Yi and L\'opez, Antonio M.}, title = {Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3231-3240} }
Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiqiang and Tao, Renshuai and Zhang, Chunjie and Yang, Guodong and Zheng, Xiaolong and Zhao, Yao}, title = {Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8090-8100} }
AR4D: Autoregressive 4D Generation from Monocular Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hanxin and He, Tianyu and Chen, Zhibo}, title = {AR4D: Autoregressive 4D Generation from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {88-98} }
HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Spetlik_2026_CVPR, author = {Spetlik, Radim and Pliska, Michal and Vrba, Vojt\v{e}ch and Matas, Ji\v{r}{\'\i}}, title = {HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3200-3208} }
OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenguo and Zheng, Haohan and Wang, Yishen and Xu, Le and Deng, Tianchen and Chen, Xuefeng and Chen, Qu and Zhang, Bo and Huang, Wuxiong}, title = {OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1106-1116} }
OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM-
[pdf]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Leqian and Li, Caibo and Guo, Yu and Wang, Fei}, title = {OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1431-1440} }
Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Wang, Qinian and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi}, title = {Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9248-9258} }
Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyao and Zhang, Kaipeng and Shieh, Michael Qizhe}, title = {Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1230-1239} }
PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Maleki_2026_CVPR, author = {Maleki, Armin and Radha, Hayder}, title = {PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1138-1147} }
Towards Efficient Multimodal Unified Reasoning Model via Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Qixiang and Yao, Huanjin and Chen, Jianghao and Huang, Jiaxing and Zhao, Zhicheng and Su, Fei}, title = {Towards Efficient Multimodal Unified Reasoning Model via Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9378-9388} }
UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yimu and Zhuang, Weiming and Chen, Chen and Huang, Jiabo and Li, Jingtao and Lyu, Lingjuan}, title = {UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2904-2914} }
Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hassani_2026_CVPR, author = {Hassani, Ali and Zhou, Fengzhe and Kane, Aditya and Huang, Jiannan and Chen, Chieh-Yun and Shi, Min and Walton, Steven and Hoehnerbach, Markus and Thakkar, Vijay and Isaev, Mikhail and Zhang, Qinsheng and Xu, Bing and Wu, Haicheng and Hwu, Wen-mei and Liu, Ming-Yu and Shi, Humphrey}, title = {Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3009-3018} }
SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kawada_2026_CVPR, author = {Kawada, Takuro and Kitada, Shunsuke and Nemoto, Sota and Iyatomi, Hitoshi}, title = {SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2250-2260} }
All-Age Human Mesh Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Bravo-Sanchez_2026_CVPR, author = {Bravo-S\'anchez, Laura and Armando, Matthieu and Br\'egier, Romain and Rogez, Gr\'egory and Yeung-Levy, Serena and Baradel, Fabien}, title = {All-Age Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3677-3687} }
InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Zhiqiang and Han, Xumeng and Zhang, Zhiwei and Xiong, Zenghui and Ding, Yifan and Ping, Aoxiang and Li, Xiang and Guo, Tong and Mao, Yao}, title = {InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2176-2186} }
LlamaRG: A Multi-View Large Language Model for Radiology Report Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jayas_2026_CVPR, author = {Jayas, Tanuja and Rastogi, Aditya and Raghavan, Pavithra and Brugnara, Gianluca and Schlamp, Kai and Foltyn-Dumitru, Martha and Vollmuth, Philipp}, title = {LlamaRG: A Multi-View Large Language Model for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5745-5754} }
LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yang and Zhao, Shiyu and Chen, Yuxiao and Wang, Zhenting and Jin, Can and Zhao, Mingyu and Metaxas, Dimitris N.}, title = {LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9643-9653} }
Hierarchical Textual Knowledge for Enhanced Image Clustering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yijie and Gao, Yunfan and Jiang, Weipeng and Wang, Haofen}, title = {Hierarchical Textual Knowledge for Enhanced Image Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9749-9758} }
Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nanxiang and Fan, Zhaoxin and Kang, Enhan and Gao, Daiheng and Zhou, Yun and Chang, Yanxia and Zhu, Zheng and Jin, Yeying and Wu, Wenjun}, title = {Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8080-8089} }
RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyu and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {547-557} }
Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zailong and Gao, Peng and Barthelemy, Johan and Zhou, Luping and Wang, Lei}, title = {Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9327-9336} }
Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Wenxuan and Zhao, Yanjun and Yang, Xiyuan and He, Jingrui}, title = {Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9632-9642} }
JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Dongyun and Zhang, Zhuoyang and Chen, Junyu and He, Wenkun and Peng, Qinhe and Ye, Hanrong and Lu, Yao and Yin, Hongxu and Wang, Yu and Han, Song and Cai, Han}, title = {JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2957-2967} }
Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qihong and Yan, Shaokang and Qiao, Zhimin and Wang, Jinjia and Xiong, Bo}, title = {Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {478-487} }
ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Wu, Ou}, title = {ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7914-7923} }
Detecting Precise Hand Touch Moments in Egocentric Video-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Huy Anh and Dayoub, Feras and Hoai, Minh}, title = {Detecting Precise Hand Touch Moments in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3565-3574} }
Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Chenhao and Zhu, Yichen and Wen, Junjie and Chen, Yefei and Liu, Ziang and Fang, Faming}, title = {Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1388-1397} }
FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Devkota_2026_CVPR, author = {Devkota, Alina and Thrasher, Jacob and Adjeroh, Donald and Bhattarai, Binod and Gyawali, Prashnna k.}, title = {FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2803-2812} }
Open World Image Aesthetic Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Mingxiang and Ma, Tianren and Zhang, Xijin}, title = {Open World Image Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9791-9801} }
CETCam: Camera-Controllable Video Generation via Consistent and Extensible Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zelin and Gong, Xinyu and Liu, Bangya and Song, Ziyang and Zhang, Jun and Wu, Suhui and Chen, Yongxin and Zhang, Hao}, title = {CETCam: Camera-Controllable Video Generation via Consistent and Extensible Tokenization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4749-4758} }
Rethinking Training Dynamics in Scale-Wise Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Gengze and Ge, Chongjian and Tan, Hao and Liu, Feng and Hong, Yicong}, title = {Rethinking Training Dynamics in Scale-Wise Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4593-4602} }
Scene-Level Heterogeneous Physics Simulation with 3D Gaussian Splats-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaoyang and Wu, Shangzhe and Han, Kai}, title = {Scene-Level Heterogeneous Physics Simulation with 3D Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6456-6465} }
Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Wang, Xiaoyang and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7665-7674} }
Asymmetric Collaborative Distillation for Asymmetric Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yi and Zhang, Huaidong and Luo, Xuandi and Zhou, Yan and He, Shengfeng}, title = {Asymmetric Collaborative Distillation for Asymmetric Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6706-6716} }
iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Mu_2026_CVPR, author = {Mu, Pan and Zhu, Yuchao and Zhang, Shiqi and Yan, Hanting and Zhang, Jinglin and Bai, Cong}, title = {iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1587-1596} }
Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yibo and Zhang, Ziwei and Pang, Haozhou and Li, Menghao and He, Lanshan and Qi, Gan}, title = {Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {381-390} }
OTPrune: Distribution-Aligned Visual Token Pruning Via Optimal Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiwen and Zhu, Wenhui and Li, Gen and Dong, Xuanzhao and Xiong, Yujian and Wang, Hao and Qiu, Peijie and Song, Qingquan and Wang, Zhipeng and Tang, Shao and Wang, Yalin and Razi, Abolfazl}, title = {OTPrune: Distribution-Aligned Visual Token Pruning Via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5849-5859} }
Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuohao and Li, Zeng and Zhang, Yifei and Liu, Chang and Zhou, Yu}, title = {Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1577-1586} }
Consistent Video Editing as Flow-Driven Image-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ge and Fan, Songlin and Liu, Hangxu and Song, Quanjian and Wang, Hewei and Xu, Jinfeng}, title = {Consistent Video Editing as Flow-Driven Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4625-4634} }
Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yue and Wang, Rui and Lin, Jiehong and Wang, Zhongrui and Qi, Xiaojuan}, title = {Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1358-1367} }
SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jungho and Lee, Minhyeok and Yang, Sunghun and Kang, Minseok and Lee, Sangyoun}, title = {SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {447-456} }
DMin: Scalable Training Data Influence Estimation for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Huawei and Lao, Yingjie and Zhao, Weijie}, title = {DMin: Scalable Training Data Influence Estimation for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3293-3302} }
Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Weijian and Sun, Shizhao and Yu, Tianyu and Wang, Ruiyu and Chua, Tat-Seng and Bian, Jiang}, title = {Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8919-8929} }
Adapting Large VLMs with Iterative and Manual Instructions for Generative Low-light Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaoran and Wang, Liyan and Jin, Yeying and Lam, Kin-man and Su, Zhixun and Yang, Yang and Pan, Jinshan and Wang, Cong}, title = {Adapting Large VLMs with Iterative and Manual Instructions for Generative Low-light Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4832-4842} }
Evaluating Low-Light Image Enhancement Across Multiple Intensity Levels-
[pdf]
[supp]
[bibtex]@InProceedings{Pilligua_2026_CVPR, author = {Pilligua, Maria and Serrano-Lozano, David and Peng, Pai and Baldrich, Ramon and Brown, Michael S. and Vazquez-Corral, Javier}, title = {Evaluating Low-Light Image Enhancement Across Multiple Intensity Levels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5014-5023} }
Seen-to-Scene: Keep the Seen, Generate the Unseen for Video Outpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2026_CVPR, author = {Jeon, Inseok and Lee, Minhyeok and Lee, Seunghoon and Kang, Minseok and Cho, Suhwan and Lee, Sangyoun}, title = {Seen-to-Scene: Keep the Seen, Generate the Unseen for Video Outpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4266-4275} }
ForenDeX: Unlocking Forensic Insights for Explainable AI-Generated Image Detection-
[pdf]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Chuangchuang and Wang, Jinglu and Ming, Xiang and Tao, Renshuai and Wei, Yunchao and Zhao, Yao and Lu, Yan}, title = {ForenDeX: Unlocking Forensic Insights for Explainable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6592-6601} }
Gaze into the Details: Locality-Sensitive Enhancement for OCTA Retinal Vessel Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Tuopusen and Ma, Ding and Wu, Xiangqian}, title = {Gaze into the Details: Locality-Sensitive Enhancement for OCTA Retinal Vessel Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5409-5418} }
Affine Bases for Affine Spaces-
[pdf]
[supp]
[bibtex]@InProceedings{Dogadov_2026_CVPR, author = {Dogadov, Gabriel and Alexa, Marc}, title = {Affine Bases for Affine Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {213-222} }
DELRER: Disease Evolution-Informed Longitudinal Radiology Report Generation-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Kaiyu and Wang, Bing and Li, Changchun and Lu, You and Wang, Yaning and Zhang, Huimao and Li, Ximing}, title = {DELRER: Disease Evolution-Informed Longitudinal Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5357-5367} }
A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jena_2026_CVPR, author = {Jena, Pritham K and Baburaj, Bhavika and Anand, Tushar and Dutta, Vedant and Ulavala, Vineeth and Ali, Sk Aziz}, title = {A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1913-1923} }
CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yanlin and Liu, Yuchen and Liu, Mingren}, title = {CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1117-1127} }
THOM: Generating Physically Plausible Hand-Object Meshes From Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Uyoung and Tiruneh, Yihalem Yimolal and Chang, Hyung Jin and Baek, Seungryul and Kim, Kwang In}, title = {THOM: Generating Physically Plausible Hand-Object Meshes From Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3653-3664} }
Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shoby_2026_CVPR, author = {Shoby, Abin and Huy, Ta Duc and Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and van den Hengel, Anton and Le Nguyen, Phi and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9185-9194} }
A Denoising-Enhanced Multimodal Learning Framework for Robust Nasal Endoscopy Report Generation-
[pdf]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Xinpan and Huang, Mingzhu and Hua, Liujie and Ju, Jianuo and Zhao, Xiaowei and Wu, Lin Yuanbo}, title = {A Denoising-Enhanced Multimodal Learning Framework for Robust Nasal Endoscopy Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5545-5555} }
Elicit and Enhance: Advancing Multimodal Reasoning in Medical Scenarios-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhongzhen and Mu, Linjie and Gu, Yannian and Hu, Kangzhe and Hua, Shengyi and Zhang, Xiaofan}, title = {Elicit and Enhance: Advancing Multimodal Reasoning in Medical Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5609-5619} }
The DeepSpeak Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barrington_2026_CVPR, author = {Barrington, Sarah and Bohacek, Maty and Farid, Hany}, title = {The DeepSpeak Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1893-1902} }
PRADA: Probability-Ratio-Based Attribution and Detection of Autoregressive-Generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Damm_2026_CVPR, author = {Damm, Simon and Ricker, Jonas and Petzka, Henning and Fischer, Asja}, title = {PRADA: Probability-Ratio-Based Attribution and Detection of Autoregressive-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6506-6516} }
ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hsu_2026_CVPR, author = {Hsu, Chih-Chung and Ma, Xin-Di and Liao, Wo-Ting and Lee, Chia-Ming}, title = {ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2988-2997} }
Personalized Functional Brain Network Modeling with Adaptive Auto-Weighted Learning for Automatic Brain Disorder Diagnosis-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yan and Liu, Kun and Li, Min}, title = {Personalized Functional Brain Network Modeling with Adaptive Auto-Weighted Learning for Automatic Brain Disorder Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5505-5514} }
RealDiffusion: Physics-informed Attention for Multi-character Storybook Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qi and Chen, Jun and Tsang, Ivor and Dai, Guang}, title = {RealDiffusion: Physics-informed Attention for Multi-character Storybook Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4698-4707} }
BLEG: LLM Functions as Powerful fMRI Graph-Enhancer for Brain Network Analysis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Rui and Wang, Zitong and Li, Jiaxing and Zheng, Weihuang and Kong, Youyong}, title = {BLEG: LLM Functions as Powerful fMRI Graph-Enhancer for Brain Network Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5662-5672} }
Self-Guided Integrated Gradient Method for Attribution-
[pdf]
[supp]
[bibtex]@InProceedings{Henry_2026_CVPR, author = {Henry, Sabrina and Ruget, Alice and Scholes, Stirling and Leach, Jonathan}, title = {Self-Guided Integrated Gradient Method for Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3312-3321} }
VHOI: Controllable Video Generation of Human-Object Interactions from Sparse Trajectories via Motion Densification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyue and Foo, Lin Geng and Beeler, Thabo and Dabral, Rishabh and Theobalt, Christian}, title = {VHOI: Controllable Video Generation of Human-Object Interactions from Sparse Trajectories via Motion Densification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4009-4021} }
When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Aditya}, title = {When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {829-836} }
Large Multimodal Models as General In-Context Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garosi_2026_CVPR, author = {Garosi, Marco and Farina, Matteo and Conti, Alessandro and Mancini, Massimiliano and Ricci, Elisa}, title = {Large Multimodal Models as General In-Context Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6727-6736} }
UnfoldIR: Rethinking Deep Unfolding Network in Illumination Degradation Image Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Chunming and Zhang, Rihan and Xiao, Fengyang and Fang, Chengyu and Tang, Longxiang and Zhang, Rui and Farsiu, Sina}, title = {UnfoldIR: Rethinking Deep Unfolding Network in Illumination Degradation Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5003-5013} }
Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Purohit_2026_CVPR, author = {Purohit, Vishal and Chen, Wei and Qiu, Qiang}, title = {Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1263-1272} }
UniTalking: A Unified Audio-Video Framework for Talking Portrait Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hebeizi and Sun, Benyuan and Yang, Yi and Liang, Zihao and Yin, Zihao and Sha, Xiao and Wang, Chenliang}, title = {UniTalking: A Unified Audio-Video Framework for Talking Portrait Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4647-4656} }
CADReasoner: Iterative Program Editing for CAD Reverse Engineering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kabisov_2026_CVPR, author = {Kabisov, Soslan and Kirichuk, Vsevolod and Volkov, Andrey and Barannikov, Marina and Savrasov, Gennadiy and Konushin, Anton and Kuznetsov, Andrey and Zhemchuzhnikov, Dmitrii}, title = {CADReasoner: Iterative Program Editing for CAD Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6143-6153} }
MoVieDrive: Urban Scene Synthesis with Multi-Modal Multi-View Video Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Guile and Huang, David and Bai, Dongfeng and Liu, Bingbing}, title = {MoVieDrive: Urban Scene Synthesis with Multi-Modal Multi-View Video Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4290-4299} }
S^2DiT: Sandwich Diffusion Transformer for Mobile Streaming Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lin and Wu, Yushu and Lebedev, Aleksei and Lahiri, Dishani and Dong, Meng and Sahni, Arpit and Vasilkovsky, Michael and Chen, Hao and Hu, Ju and Siarohin, Aliaksandr and Tulyakov, Sergey and Wang, Yanzhi and Kag, Anil and Li, Yanyu}, title = {S{\textasciicircum}2DiT: Sandwich Diffusion Transformer for Mobile Streaming Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4355-4365} }
Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xuepei and Feng, Mingtao and Dong, Weisheng and Chen, Lin and Feng, Jie and Wu, Fangfang and Zhu, Yufan and Mian, Ajmal Saeed}, title = {Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {942-951} }
VeCoR -- Velocity Contrastive Regularization for Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Zong-Wei and Li, Jing-Lun and Li, Lin-Ze and Zhang, Shen and Tang, Yao}, title = {VeCoR -- Velocity Contrastive Regularization for Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4739-4748} }
Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Shin_2026_CVPR, author = {Shin, Jeongwan and Kim, Jaehyeon and Ko, Donguk and Choi, Jaeho}, title = {Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2208-2219} }
FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chaoyi and Wang, Run and Luo, Feng and Pes\'e, Mert D. and Fan, Zhiwen and Zhong, Yiqi and Huang, Siyu}, title = {FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {129-138} }
HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Soroco_2026_CVPR, author = {Soroco, Mauricio and Pittaluga, Francesco and Tasneem, Zaid and Aich, Abhishek and Zhuang, Bingbing and Chen, Wuyang and Chandraker, Manmohan and Jiang, Ziyu}, title = {HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {952-959} }
Text-Driven Reasoning Video Editing via Reinforcement Learning on Digital Twin Representations-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Yiqing and Li, Chenjia and Unberath, Mathias}, title = {Text-Driven Reasoning Video Editing via Reinforcement Learning on Digital Twin Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3945-3954} }
GReD-RSITR: A Generative Re-Examined Discriminative Framework for Remote Sensing Image-Text Retrieval-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shuhuai and Pei, Songwei and Liu, Bingfeng and Huang, Yuanzhou and Li, Qian and Wang, Shangguang}, title = {GReD-RSITR: A Generative Re-Examined Discriminative Framework for Remote Sensing Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6312-6321} }
Bridging Day and Night: Unsupervised Cross-Domain Re-Identification with Synergistic Prompt and Prototype Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiyang and Liu, Rui and Dai, Hang}, title = {Bridging Day and Night: Unsupervised Cross-Domain Re-Identification with Synergistic Prompt and Prototype Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6612-6621} }
OminPSD: Layered PSD Generation with Diffusion Transformer-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Cheng and Song, Yiren and Wang, Haofan and Shou, Mike Zheng}, title = {OminPSD: Layered PSD Generation with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4190-4201} }
CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Pingrui and Zhou, Yanshan and Xie, Zihao and Yang, Hua}, title = {CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2197-2207} }
Rolling and Denoising: Rethinking Dynamic Modal Fusion for Multi-Modal Object Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shihao and Huang, Huaibo and Zheng, Aihua and Tang, Jin and He, Ran}, title = {Rolling and Denoising: Rethinking Dynamic Modal Fusion for Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6560-6569} }
Less is More: Multimodal Human Pose Estimation with Selective Fusion-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yutong and Huang, Qianyi and Chen, Xu}, title = {Less is More: Multimodal Human Pose Estimation with Selective Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3575-3584} }
DenoiseGS: Gaussian Reconstruction Model for Burst Denoising-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yongsen and Cai, Yuanhao and Zhang, Yulun}, title = {DenoiseGS: Gaussian Reconstruction Model for Burst Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5096-5105} }
Video4Spatial: Towards Visuospatial Intelligence with Context-Guided Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Zeqi and Zhao, Yiwei and Li, Lingxiao and Lan, Yushi and Yu, Ning and Garg, Rahul and Taghavi, Mohammad H. and Pan, Xingang}, title = {Video4Spatial: Towards Visuospatial Intelligence with Context-Guided Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3933-3944} }
Optical Tolerance-Compensated Diffusion Model for Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Hongji and Gong, Huihui and Zuo, Tanli and Zhao, Yu and Dai, Jin and Tian, Jingduo and Ni, Kai}, title = {Optical Tolerance-Compensated Diffusion Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5064-5074} }
EI: Early Intervention for Multimodal Imaging Based Disease Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Qijie and Lin, HaiLan and Li, Xirong}, title = {EI: Early Intervention for Multimodal Imaging Based Disease Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5632-5640} }
Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonghwan and Kim, Wontaek and Lu, Yidan and Cheng, Jin and Zargarbashi, Fatemeh and Zeng, Zicheng and Qi, Zekun and Dou, Zhiyang and Sontakke, Nitish and Baek, Donghoon and Yi, Li and Ha, Sehoon and Li, Tianyu}, title = {Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1421-1430} }
Visual Reasoning Through Tool-Supervised Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Qihua and Sahin, Gozde and Wang, Pei and Cai, Zhaowei and Shrestha, Robik and Yang, Hao and Modolo, Davide}, title = {Visual Reasoning Through Tool-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8993-9002} }
Cross-Resolution Diffusion Models Via Network Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Jiaxuan and Zhu, Junhan and Wang, Huan}, title = {Cross-Resolution Diffusion Models Via Network Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4224-4233} }
GR-Diffusion: Graph-Guided Relational-Aware Diffusion via Attention Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaochen and Xi, Xiaoting and Yin, Chao and Li, Xiaoqiang and Dong, Daoguo}, title = {GR-Diffusion: Graph-Guided Relational-Aware Diffusion via Attention Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3759-3768} }
DM^3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiran and Liu, Yeqiang and Wei, Yijie and Han, Mina and Guo, Qiannan and Li, Zhenbo}, title = {DM{\textasciicircum}3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8398-8407} }
When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yanhui and Zhou, Qi and Xu, Zhihong and Guo, Huizhong and Wang, Wenhai and Wang, Dongxia}, title = {When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2038-2048} }
One Layer Is Enough: Adapting Pretrained Visual Encoders for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuan and Chen, Chen and Gu, Jiatao}, title = {One Layer Is Enough: Adapting Pretrained Visual Encoders for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4688-4697} }
Memory-efficient Continual Learning with Prototypical Exemplar Condensation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, M.-Duong and Dao, Thien-Thanh and Nguyen, Le-Tuan and Le, Dung D. and Wong, Kok-Seng}, title = {Memory-efficient Continual Learning with Prototypical Exemplar Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7675-7685} }
Eevee: Towards Close-up High-resolution Video-based Virtual Try-on-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Jianhao and Bai, Yancheng and Chen, Ruidong and Zhang, Xuanpu and Sun, Lei and Jin, Dongyang and Xu, Ryan and Zhang, Nannan and Song, Dan and Chu, Xiangxiang}, title = {Eevee: Towards Close-up High-resolution Video-based Virtual Try-on}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4614-4624} }
Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yujie and Zhang, Hu and Liang, Jiye and Wang, Zhiqiang and Tan, Hongye and Li, Ru}, title = {Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9282-9293} }
See Tomorrow, Act Today: Foresight-Driven Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bozhou and Song, Nan and Wang, Yuang and Deng, Jiankang and Zhu, Xiatian and Zhang, Li}, title = {See Tomorrow, Act Today: Foresight-Driven Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1180-1190} }
CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenyu and Shi, Tengfei and Wang, Xuehao and Li, Ming and Chen, Chenglizhao and Song, Wenfeng and Hao, Aimin}, title = {CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7241-7250} }
NumeriKontrol: Adding Numeric Control to Diffusion Transformers for Instruction-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhenyu and Shen, Xiaoqi and Nan, Haotian and Zhang, Xinyu}, title = {NumeriKontrol: Adding Numeric Control to Diffusion Transformers for Instruction-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4389-4399} }
Direct Language Embedding Enables Gaussian Splatting for Large Scenes-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhida and Zhu, Jianqiao and Huang, Hejin and Qin, Yipeng and Yang, Sibei and Li, Guanbin}, title = {Direct Language Embedding Enables Gaussian Splatting for Large Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7231-7240} }
GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Zekun and Han, Ruize and Wang, Zhixiang and Wan, Liang and Feng, Wei}, title = {GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1872-1882} }
AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Jianheng and He, Jingyu and Fan, Kejia and He, Run and Wang, Jingchao and Liu, Anfeng and Song, Houbing Herbert and Wang, Leye and Zhu, Zhanxing and Zhuang, Huiping and Liu, Yunhuai}, title = {AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7768-7778} }
DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering-
[pdf]
[supp]
[bibtex]@InProceedings{Araneda_2026_CVPR, author = {Araneda, Guillermo Figueroa and Jimenez, Iris Dania and Hofherr, Florian and Ko, Manny and Andrade-Loarca, Hector and Cremers, Daniel}, title = {DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8461-8470} }
AceMIL: Ordinal-Aware Multiple Instance Learning for Pathological Progression Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shijie and Chen, Yiming and Gong, Yingyun and Zhou, Hongwen and Chen, Feng-Jung and Gao, Xieping and Chen, Zhineng}, title = {AceMIL: Ordinal-Aware Multiple Instance Learning for Pathological Progression Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5336-5346} }
SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Meng and Li, Xingyu and Liu, Xue and Reid, Ian and Liang, Xiaodan}, title = {SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7176-7187} }
Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mingjie and Kim, Edward and Zhao, Yue and Adeli, Ehsan and Pohl, Kilian M.}, title = {Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1211-1220} }
One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Darur_2026_CVPR, author = {Darur, Balaji and Garg, Amanmeet and Tapaswi, Makarand}, title = {One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8268-8279} }
DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Okazaki_2026_CVPR, author = {Okazaki, Soichiro and Sasaki, Tatsuya and Ohashi, Hiroki}, title = {DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6890-6900} }
EggHand: A Multimodal Foundation Model for Egocentric Hand Pose Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeyoung and Kim, Hyeondong and Kim, Yujin and Park, Daehee}, title = {EggHand: A Multimodal Foundation Model for Egocentric Hand Pose Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3521-3531} }
RoadTones: Tone Controllable Text Generation from Road Event Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parikh_2026_CVPR, author = {Parikh, Chirag and Lipare, Siddhi Pravin and Sarvadevabhatla, Ravi Kiran}, title = {RoadTones: Tone Controllable Text Generation from Road Event Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1019-1028} }
SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Inadumi_2026_CVPR, author = {Inadumi, Shun and Tanaka, Shohei and Hirasawa, Tosho and Hashimoto, Atsushi and Yoshino, Koichiro and Ushiku, Yoshitaka}, title = {SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2131-2141} }
DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yang and Xu, Kai and Hou, Junyao and Zhang, Miao and Li, Xiang and Chen, Zhenghua and Gao, Yingxue and Wu, Min}, title = {DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7789-7798} }
SemanticMoments: Training-Free Motion Similarity via Third Moment Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huberman_2026_CVPR, author = {Huberman, Saar and Goldberg, Kfir and Patashnik, Or and Benaim, Sagie and Mokady, Ron}, title = {SemanticMoments: Training-Free Motion Similarity via Third Moment Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8419-8428} }
Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Tan, Hao and Wang, Peng and Xu, Zexiang and Fuxin, Li}, title = {Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {370-380} }
VisionCreator: A Native Visual-Generation Agentic Model with Understanding, Thinking, Planning and Creation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Jinxiang and Lu, Zexin and He, Jiajun and Quan, Rongwei and Zhao, Wenzhe and Yang, Qinyu and Chen, Qi and Lin, Qin and Li, Chuyue and Gao, Tao and Shan, Yuhao and Guo, Song and Lu, Qinglin}, title = {VisionCreator: A Native Visual-Generation Agentic Model with Understanding, Thinking, Planning and Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4140-4149} }
PEdit: Pareto-Guided Image Editing via Dynamic Latent Trajectory Control-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Sooyeon and Park, Jaeil and Cho, Sung-Bae}, title = {PEdit: Pareto-Guided Image Editing via Dynamic Latent Trajectory Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4800-4809} }
QDM: Quadtree-Based Region-Adaptive Sparse Diffusion Models for Efficient Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Donglin and Vicol, Paul and Qi, Xiaojuan and Liao, Renjie and Zhang, Xiaofan}, title = {QDM: Quadtree-Based Region-Adaptive Sparse Diffusion Models for Efficient Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5044-5053} }
Weakly Supervised Micro-Expression Spotting based on Boundary Refinement Mechanism and Cross-subject Learning Representation-
[pdf]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Zhihua and Chang, Haolin and Miao, Guohua and Chen, Jianing}, title = {Weakly Supervised Micro-Expression Spotting based on Boundary Refinement Mechanism and Cross-subject Learning Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3428-3437} }
Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wanying and Chen, Zhuo and Lu, Jianzhi and Ma, Chenxi and Tan, Weimin and Yan, Bo}, title = {Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2721-2730} }
Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Aota_2026_CVPR, author = {Aota, Toshimichi and Hashimoto, Akinori and Sekizuka, Naoto and Okatani, Takayuki}, title = {Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6932-6942} }
SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification-
[pdf]
[bibtex]@InProceedings{Hsieh_2026_CVPR, author = {Hsieh, Jun Wei and Wu, Ying-Hsuan and Hsieh, Yi-Kuan and Li, Xin and Peng, Kuan-Chuan and Chang, Ming-Ching}, title = {SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6839-6848} }
PSIM: Perceptual Similarity Index Measure-
[pdf]
[supp]
[bibtex]@InProceedings{Eimon_2026_CVPR, author = {Eimon, Md Eimran Hossain and Kalva, Hari}, title = {PSIM: Perceptual Similarity Index Measure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8564-8574} }
StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yifei and Li, Zhenkai and Qian, Tianwen and Zheng, Huanran and Wang, Zheng and Fu, Yuqian and Wang, Xiaoling}, title = {StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9422-9432} }
HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion-
[pdf]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Aihua and Yang, Jun and Liu, Yong-Jin and He, Ying}, title = {HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {149-159} }
OffNadirLoc: Benchmark and Framework for Challenging UAV-to-Satellite Geo-Localization under Large Off-Nadir Views-
[pdf]
[bibtex]@InProceedings{Qiao_2026_CVPR, author = {Qiao, Qian and Liu, Wenye and Liu, Ting and Shu, Jiuhe and Wang, Peng}, title = {OffNadirLoc: Benchmark and Framework for Challenging UAV-to-Satellite Geo-Localization under Large Off-Nadir Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6394-6403} }
PLCReg: Correlation-Aware Polar-Linear Attention for Guiding Medical Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yedi and Huang, Wenhui and Zheng, Yuanjie}, title = {PLCReg: Correlation-Aware Polar-Linear Attention for Guiding Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5535-5544} }
PhySe-RPO: Physics and Semantics Guided Relative Policy Optimization for Diffusion-Based Surgical Smoke Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Zining and Xue, Cheng and Liu, Chunhui and Xu, Bin and Chen, Ming and Hu, Xiaowei}, title = {PhySe-RPO: Physics and Semantics Guided Relative Policy Optimization for Diffusion-Based Surgical Smoke Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5347-5356} }
DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yani and Wu, Dongming and Shi, Hao and Liu, Yingfei and Wang, Tiancai and Dong, Xingping}, title = {DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3103-3113} }
Do Audio-Visual Large Language Models Really See and Hear?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Selvakumar_2026_CVPR, author = {Selvakumar, Ramaneswaran and Jayakumar, Kaousheik and Sakshi, S and Ghosh, Sreyan and Gao, Ruohan and Manocha, Dinesh}, title = {Do Audio-Visual Large Language Models Really See and Hear?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5892-5902} }
Prompt-Guided Image Editing with Masked Logit Nudging in Visual Autoregressive Models-
[pdf]
[supp]
[bibtex]@InProceedings{El-Ghoussani_2026_CVPR, author = {El-Ghoussani, Amir and H\"olle, Marc and Carneiro, Gustavo and Belagiannis, Vasileios}, title = {Prompt-Guided Image Editing with Masked Logit Nudging in Visual Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4810-4820} }
Beyond Pixel Loss: Video-INRs Prefer Perceptual Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Junqi and Cong, Wuyang and Lu, Ming and Xu, Bowei and Ma, Zhan}, title = {Beyond Pixel Loss: Video-INRs Prefer Perceptual Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4843-4854} }
KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Dewi_2026_CVPR, author = {Dewi, Christine and Thiruvady, Dhananjay R and Zaidi, Nayyar}, title = {KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8766-8775} }
When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Gao, Yulang and Zou, Jiachen and Wei, Chen and Liu, Quanying}, title = {When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8111-8120} }
Shape and Texture Recognition in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Eppel_2026_CVPR, author = {Eppel, Sagi and Bismut, Mor and Strugatski, Alona}, title = {Shape and Texture Recognition in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1839-1849} }
StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Ben_Mabrouk_2026_CVPR, author = {Ben Mabrouk, Souheib and Deschaud, Jean-Emmanuel and Coupet\'e, Eva and Derbanne, Thomas and Rahmouni, Nicolas}, title = {StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8481-8491} }
Depth Adaptive Efficient Visual Autoregressive Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chunliang and Cao, Tianze and Zhao, Sanyuan}, title = {Depth Adaptive Efficient Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4213-4223} }
TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Honghui and Fang, Chuangjie and Meng, Yiqun and Jiang, Jiawei and Chan, Sixian and Zhang, Shiqing and Zheng, Jianwei}, title = {TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1670-1679} }
Mitigating Vision-Text Order Bias in Vision-Language Model-
[pdf]
[bibtex]@InProceedings{Gan_2026_CVPR, author = {Gan, Weilin and Song, Yifan and Yu, Zhuocheng and Li, Sujian}, title = {Mitigating Vision-Text Order Bias in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9664-9673} }
An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhou and Tang, Qiling and Chai, XinYu and Liu, Rong and Li, Zhi and Liu, Liman}, title = {An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3241-3251} }
M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Haolong and Tan, Kaijun and Shen, Yeqing and Huang, Xin and Wang, Jia and Ge, Zheng and Zhang, Xiangyu and Li, Si and Jiang, Daxin}, title = {M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2731-2741} }
Jano: Adaptive Diffusion Generation with Early-Stage Convergence Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zeng, Linqian and Zhou, Yijin and Li, Hengjie and Zhai, Jidong}, title = {Jano: Adaptive Diffusion Generation with Early-Stage Convergence Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4486-4494} }
Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenbing and Liang, Jianing and Cheng, Linjie and Pan, Yurui and Chen, Zhuhao and Yan, Qingwang and Cheng, Yudong and Zhang, Jianghui and Chi, Mingmin and Peng, Bo}, title = {Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2060-2068} }
AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yulu and Cheng, Jiujun and Wang, Haowen and Suo, Dengyang and Ren, Pei and Mao, Qichao and Gao, Shangce and Huang, Yakun}, title = {AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3082-3091} }
PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiangyu and Wang, Chen and Liu, Yumao and He, Dengbo and Zhang, Jiahao and Ma, Ke}, title = {PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1010-1018} }
Unified Urban Tuning: Co-Enhancing Satellite and Street View Reasoning with a Progressive Tuning Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yong and Zhang, Weiyu and Dai, Ling and Yang, Jian and Yin, Dacheng and Li, Sirun and Lyu, Jing and Rao, Fengyun and Zhang, Fan}, title = {Unified Urban Tuning: Co-Enhancing Satellite and Street View Reasoning with a Progressive Tuning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6302-6311} }
OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhishan and Wei, Siyuan and Wang, Zengran and Wang, Chunjie and Yan, Xiaosheng and Liu, Xiao}, title = {OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {233-242} }
Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Aoqiang and Hu, Min and Xing, Yan and Tang, Yiming}, title = {Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6921-6931} }
Anomaly Agent: Unified Anomaly Retrieval and Synthesis Before Manufacturing-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiangyue and Wang, Xiaoyang and Yao, Siyue and Sun, Mingjie and Wu, Yupei}, title = {Anomaly Agent: Unified Anomaly Retrieval and Synthesis Before Manufacturing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4345-4354} }
ReConText3D: Replay-based Continual Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khan_2026_CVPR, author = {Khan, Muhammad Ahmed Ullah and Bin Amir, Muhammad Haris and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {ReConText3D: Replay-based Continual Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7893-7902} }
RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jingqi and Lu, Jingxi and Li, Chenghao and Sarkar, Sreetama and Kundu, Souvik and A Beerel, Peter}, title = {RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2783-2792} }
Tiny Inference-Time Scaling with Latent Verifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bucciarelli_2026_CVPR, author = {Bucciarelli, Davide and Turri, Evelyn and Baraldi, Lorenzo and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Tiny Inference-Time Scaling with Latent Verifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2873-2882} }
TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Yao, Chao and Liu, Meiqin and Xiao, Jimin}, title = {TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7472-7482} }
ControlPose: High-Fidelity Pose-Controlled Image Generation with Multi-Faceted Pose Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Zhongjing and Chen, Xiao and Nie, Zhiwei and Chen, Yuxuan and Liu, Chang and Ji, Xiangyang and Chen, Jie}, title = {ControlPose: High-Fidelity Pose-Controlled Image Generation with Multi-Faceted Pose Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3851-3860} }
Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Junlong and Wen, Zichen and Yang, Boxue and Yang, Yantai and Liu, Xuyang and Liao, Chenfei and Chen, Zhaorun and Wang, Shaobo and Zhang, Linfeng}, title = {Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9131-9142} }
P^2CS: Parallel Point Cloud Pre-Training with Semantic Consistency-
[pdf]
[bibtex]@InProceedings{Diao_2026_CVPR, author = {Diao, Linshuang and Song, Sensen and Jia, Yuan and Qian, Yurong and Ren, Dayong}, title = {P{\textasciicircum}2CS: Parallel Point Cloud Pre-Training with Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5117-5126} }
PHYLOMAN: Generative Behavior Control via Fusing LLM Planning and Physics-based Control-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Tang, Jinzhou and Liu, Sidi and Wang, Jian and Wang, Keze}, title = {PHYLOMAN: Generative Behavior Control via Fusing LLM Planning and Physics-based Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3585-3597} }
Super Sparse DETR: YOLO-Competitive Convergence and Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hebao}, title = {Super Sparse DETR: YOLO-Competitive Convergence and Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6677-6684} }
THEval. Evaluation Framework for Talking Head Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Quignon_2026_CVPR, author = {Quignon, Nabyl and Chopin, Baptiste and Wang, Yaohui and Dantcheva, Antitza}, title = {THEval. Evaluation Framework for Talking Head Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1943-1953} }
Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Jungwon and Kim, Eunwoo}, title = {Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9368-9377} }
HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Hui and Ke, Xiao and Zeng, Zhihong and Xu, Huangbiao and Wu, Huanqi}, title = {HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8337-8346} }
OmniHead: A Unified Model for Dynamic Nonverbal Facial Behaviors-
[pdf]
[supp]
[bibtex]@InProceedings{Vuillecard_2026_CVPR, author = {Vuillecard, Pierre and Odobez, Jean-Marc}, title = {OmniHead: A Unified Model for Dynamic Nonverbal Facial Behaviors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3553-3564} }
Online Interpretable Matrix Decomposition for Large-Scale Streaming Data-
[pdf]
[supp]
[bibtex]@InProceedings{Abdelgawad_2026_CVPR, author = {Abdelgawad, Muhammad A. A. and Eldaly, Abdelrahman B. M. and Xinmin, Meng and Jing, Peng and Sanka, Abdurrashid Ibrahim and Cheung, Ray C.C. and Yan, Hong}, title = {Online Interpretable Matrix Decomposition for Large-Scale Streaming Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7030-7039} }
ROSE: Retrieval-Oriented Segmentation Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Song and Jie, Guangquan and Ding, Henghui and Jiang, Yu-Gang}, title = {ROSE: Retrieval-Oriented Segmentation Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7398-7407} }
How far have we gone in Generative Image Restoration? A study on its capability, limitations and evaluation practices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Xiang and Hu, Jinfan and You, Zhiyuan and Yan, Kainan and Tang, Yu and Dong, Chao and Gu, Jinjin}, title = {How far have we gone in Generative Image Restoration? A study on its capability, limitations and evaluation practices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4909-4919} }
VoxFace: Streaming Audio-Visual Synthesis via Relay-Style Multi-Token Prediction for Interactive Conversation-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Junwen and Li, Chuanyue and Zhang, Peng}, title = {VoxFace: Streaming Audio-Visual Synthesis via Relay-Style Multi-Token Prediction for Interactive Conversation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3543-3552} }
Jailbreaking Frontier Foundation Models Through Intention Deception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinhe and Sycara, Katia and Xie, Yaqi}, title = {Jailbreaking Frontier Foundation Models Through Intention Deception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {666-674} }
PTF-CT: Polar-Aware Temporal-Frequential Iterative Reconstruction for Sparse-View CT-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Borui and Qin, Guanyi and Li, Chuanpu and Jin, Yueming}, title = {PTF-CT: Polar-Aware Temporal-Frequential Iterative Reconstruction for Sparse-View CT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5214-5223} }
Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes-
[pdf]
[supp]
[bibtex]@InProceedings{Ran_2026_CVPR, author = {Ran, Weihang and Zhu, Qingtian and Cao, Mingdeng and Yuan, Wei and Echizen, Isao and Zheng, Yinqiang}, title = {Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1597-1607} }
Rethinking Compact (<1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions-
[pdf]
[supp]
[bibtex]@InProceedings{Kyrkou_2026_CVPR, author = {Kyrkou, Christos}, title = {Rethinking Compact (\ensuremath{<}1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2710-2720} }
MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, JaeHyuck and Kim, Minjun and Hong, Je Hyeong}, title = {MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8524-8533} }
Two-Stage 3D Pulmonary Vessel Reconstruction via Trunk--Expansion Coupled Point Cloud Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jie and Xin, Yu and Li, Guoqing}, title = {Two-Stage 3D Pulmonary Vessel Reconstruction via Trunk--Expansion Coupled Point Cloud Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5245-5254} }
Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sen_2026_CVPR, author = {Sen, Souptik and Younis, Raneen and Ahmadi, Zahra}, title = {Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6080-6089} }
GRAFT: Graph-Based Affordance Transfer via Part Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Mengying and Mishra, Utkarsh and Mandlekar, Ajay and Xu, Danfei}, title = {GRAFT: Graph-Based Affordance Transfer via Part Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8746-8755} }
Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Nagy_2026_CVPR, author = {Nagy, Rajmund and Voss, Hendric and Hoang-Minh, Thanh and Tsakov, Mihail and Nikolov, Teodor and Zhang, Zeyi and Ao, Tenglong and Yang, Sicheng and Huang, Shaoli and Cheng, Yongkang and Mughal, M. Hamza and Dabral, Rishabh and Chhatre, Kiran and Theobalt, Christian and Liu, Libin and Kopp, Stefan and McDonnell, Rachel and Neff, Michael and Kucherenko, Taras and Yoon, Youngwoo and Henter, Gustav Eje}, title = {Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2152-2164} }
HM-Talker: Hybrid Motion Modeling for High-Fidelity Talking Head Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shiyu and Jiang, Kui and Jiang, Junjun and Liu, Xianming and Feng, Xiaocheng and Ma, Fei and Yao, Hongxun and Tian, Qi}, title = {HM-Talker: Hybrid Motion Modeling for High-Fidelity Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3729-3738} }
M^3D-BFS: a Multi-Stage Dynamic Fusion Strategy for Sample-Adaptive Multi-Modal Brain Network Analysis-
[pdf]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Rui and Zhang, Xiaotong and Li, Jiaxing and Li, Yueying and Wei, Jiayin and Kong, Youyong}, title = {M{\textasciicircum}3D-BFS: a Multi-Stage Dynamic Fusion Strategy for Sample-Adaptive Multi-Modal Brain Network Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5419-5429} }
Gated Differential Linear Attention: A Linear-Time Decoder for High-Fidelity Medical Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hongbo and Bozorgpour, Afshin and Merhof, Dorit and Zhang, Minjia}, title = {Gated Differential Linear Attention: A Linear-Time Decoder for High-Fidelity Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5579-5588} }
POMA-3D: The Point Map Way to 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Ye and Luo, Weixun and Huang, Ranran and Jing, Junpeng and Mikolajczyk, Krystian}, title = {POMA-3D: The Point Map Way to 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7282-7292} }
HippoMM: Hippocampal-inspired Multimodal Memory for Long Audiovisual Event Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yueqian and Zhang, Jingyang and Wang, Qinsi and Ye, Hancheng and Fu, Yuzhe and Liu, Yudong and Li, Hai Helen and Chen, Yiran}, title = {HippoMM: Hippocampal-inspired Multimodal Memory for Long Audiovisual Event Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5968-5977} }
BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziyi and Zhou, Jinzhao and Jiang, Xiaowei and Cao, Beining and Ma, Wenhao and Shen, Yang and Li, Ren and Wang, Yu-Kai and Lin, Chin-teng}, title = {BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7050-7059} }
FREE: Uncertainty-Aware Autoregression for Parallel Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Xinwan and Li, Bowen and Luo, Jiajun and Li, Ye and Wang, Zhi}, title = {FREE: Uncertainty-Aware Autoregression for Parallel Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4097-4107} }
Controllable Radar Simulation with Waveform Parameter Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Weiqing and Huang, Hao and Zhong, Chonghao and Lin, Yujie and Wang, Nan and Chen, Xiaoxue and Chen, Zhaoxi and Zhang, Saining and Yang, Shuocheng and Merriaux, Pierre and Lei, Lei and Zhao, Hao}, title = {Controllable Radar Simulation with Waveform Parameter Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6424-6434} }
Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Chao and Zhang, Junbo and Zhu, Chuanbo and Huang, Mingjun and Du, Bo}, title = {Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7272-7281} }
Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Qingtao and Dou, Zhihao and Li, Shuo}, title = {Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9726-9737} }
Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Zezheng and Liu, Runxiang and Fang, Yuming and Zuo, Yifan and Yan, Jiebin}, title = {Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7872-7882} }
DiFlowDubber: Discrete Flow Matching for Automated Video Dubbing via Cross-Modal Alignment and Synchronization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Ngoc-Son and Tran, Thanh V. T. and Choi, Jeongsoo and Huynh-Nguyen, Hieu-Nghia and Hy, Truong-Son and Nguyen, Van}, title = {DiFlowDubber: Discrete Flow Matching for Automated Video Dubbing via Cross-Modal Alignment and Synchronization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5838-5848} }
Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xuanming and An, Baoyi and Nie, Dingyu and Ren, Haoyu and Zou, Zhengwei and Yang, Yizhe and Shen, Jialie and Jin, Zhiwen and Qian, Xueming and Yang, Zhongyu and Zhao, Guoshuai}, title = {Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1608-1617} }
SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhuohang and Yuan, Xu and Qu, Haohao and Lin, Shanru and Liu, Kanglong and Fan, Wenqi and Qing, Li}, title = {SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2165-2175} }
MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Ji, Yifei and Han, Yongxuan and Zheng, Zhenzhe}, title = {MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2926-2936} }
PrismNet: Semantic-Aware Image Enhancement via Vision Transformer and Zero-Cost Gating-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruichen}, title = {PrismNet: Semantic-Aware Image Enhancement via Vision Transformer and Zero-Cost Gating}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4866-4876} }
Catalyst: Out-of-Distribution Detection via Elastic Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hassan_2026_CVPR, author = {Hassan, Abid and Ngo, Tuan and Shafiq, Saad and Medvidovic, Nenad}, title = {Catalyst: Out-of-Distribution Detection via Elastic Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1618-1628} }
ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ruan_2026_CVPR, author = {Ruan, Quanyuan and Shi, Kewei and Lei, Jiabao and Gao, Xifeng and Han, Xiaoguang}, title = {ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8439-8448} }
ReCliFF: Adaptive Orthogonal Decoupling for Federated Fine-tuning of Medical MLLMs-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yuncheng and Feng, Chun-Mei and Sun, Rui and Zhang, Le}, title = {ReCliFF: Adaptive Orthogonal Decoupling for Federated Fine-tuning of Medical MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5275-5284} }
Inference-Time Alignment of Diffusion Models with Evolutionary Algorithms-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Davis, James C. and Lu, Yung-Hsiang}, title = {Inference-Time Alignment of Diffusion Models with Evolutionary Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4118-4128} }
VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Jianxiang and Hong, Meisheng and Li, Jungang and Guo, Weiyu and Hu, Xuming and Xiong, Hui}, title = {VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9003-9012} }
MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Changhui and Nagarajan, Bhalaji and Marques, Ricardo and Radeva, Petia}, title = {MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7018-7029} }
How2Sign-Synth3D: Markerless Holistic Sign Language Performance Capture and Synthetic Data for Dense Landmark Tracking-
[pdf]
[bibtex]@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Huber, Stephan and Koller, Oscar and Duarte, Amanda}, title = {How2Sign-Synth3D: Markerless Holistic Sign Language Performance Capture and Synthetic Data for Dense Landmark Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3501-3509} }
Multimodal Large Language Models as Image Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kisel_2026_CVPR, author = {Kisel, Nikita and Volkov, Illia and Janouskova, Klara and Matas, Jiri}, title = {Multimodal Large Language Models as Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1711-1720} }
Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Ruichao and Fang, Zhou and Guo, Teng and Yang, Hao and Li, Yaping and Peng, Shaohua and Huang, Maji and Lin, Xiaoyu and Liu, Shuoyang and Li, Xuepeng and Zhang, Yuyu and Rao, Hai}, title = {Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8983-8992} }
PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Jie and Wu, JunXiang and An, Nan and Zhang, Zhen and Xiang, Shuiying and Zhang, Mingjin and Li, Yunsong and Gao, Yu'e}, title = {PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2367-2376} }
Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wulff_2026_CVPR, author = {Wulff, Theodor and Tavella, Federico and Maharjan, Rahul Singh and Adikari, Manith and Cangelosi, Angelo}, title = {Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9269-9281} }
Learning Predictive Visuomotor Coordination-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Wenqi and Lai, Bolin and Cao, Xu and Liu, Miao and Xu, Danfei and Rehg, James M.}, title = {Learning Predictive Visuomotor Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3609-3619} }
AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Ma, Yidan and Qu, Hanzhang and Cao, Jianfu}, title = {AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7552-7561} }
Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yichun and Hu, Zixuan and Duan, Ling-Yu}, title = {Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6849-6858} }
Advancing Open-Set Detection and Segmentation via Disentangled Representations-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haokang and Guan, Yuchen and Cheng, Runxi and Yang, Yujiu}, title = {Advancing Open-Set Detection and Segmentation via Disentangled Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6622-6632} }
SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Fudong and Liu, Dingning and Wang, Hanshi and Zhang, Yiwei and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {900-909} }
Generative Visual Chain-of-Thought for Image Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Zijin and Hang, Tiankai and Cheng, Yiji and Zhang, Shiyi and He, Runze and Xu, Yu and Wang, Chunyu and Li, Bing and Chang, Zheng and Liang, Kongming and Lu, Qinglin and Ma, Zhanyu}, title = {Generative Visual Chain-of-Thought for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4657-4667} }
CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tung-I and Wang, Lingdong and Maji, Subhransu and Sitaraman, Ramesh K.}, title = {CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {457-467} }
Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Ji Woong and Wang, Ke and Fu, Zipeng and Chen, Sirui and zhao, Cong and Lai, Jeff and Finn, Chelsea}, title = {Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1515-1524} }
SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tanaka_2026_CVPR, author = {Tanaka, Shohei and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2753-2762} }
Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyu and Chen, Xinyan and Zhang, Renrui and An, Ruichuan and Qi, Yu and Jiang, Dongzhi and Li, Xiangtai and Zhang, Manyuan and Li, Hongsheng and Heng, Pheng-Ann}, title = {Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9175-9184} }
Actionable Human Motion Generation via Latent Imitation and Fine-Grained Text Completion-
[pdf]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Feiyang and Yuan, Haoqi and Lu, Zongqing}, title = {Actionable Human Motion Generation via Latent Imitation and Fine-Grained Text Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3384-3393} }
Deep-to-Shallow Knowledge Transfer: Multi-Scale Self-Distillation with Bidirectional Aware for 3D Brain Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ziwei and Tan, Dayu and Peng, Xin and Zhong, Weimin}, title = {Deep-to-Shallow Knowledge Transfer: Multi-Scale Self-Distillation with Bidirectional Aware for 3D Brain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5158-5167} }
Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs-
[pdf]
[supp]
[bibtex]@InProceedings{Gandhi_2026_CVPR, author = {Gandhi, Mona and Joseph, K.J. and Parthasarathy, Srinivasan and Nag, Sayan}, title = {Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1786-1796} }
Learning a Particle Dynamics Model with Real-World Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Chanho and Sumukh, Suhas V. and Fuxin, Li}, title = {Learning a Particle Dynamics Model with Real-World Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {498-507} }
Guided Lensless Polarization Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kraicer_2026_CVPR, author = {Kraicer, Noa and Yosef, Erez and Giryes, Raja}, title = {Guided Lensless Polarization Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1252-1262} }
FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation-
[pdf]
[bibtex]@InProceedings{Rao_2026_CVPR, author = {Rao, Zhi and Zhou, Yucheng and Zhou, Benjia and Huang, Yiqing and Escalera, Sergio and Wan, Jun}, title = {FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9237-9247} }
DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Piland_2026_CVPR, author = {Piland, Jacob and Sweet, Christopher and Czajka, Adam}, title = {DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1201-1210} }
dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yingzi and Cao, Yulong and Ding, Wenhao and Zhang, Shuibai and Wang, Yan and Ivanovic, Boris and Jiang, Ming and Pavone, Marco and Xiao, Chaowei}, title = {dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1050-1061} }
Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2026_CVPR, author = {Ho, Sy-Tuyen and Hao, Koh Jun and Nguyen, Ngoc-Bao and Binder, Alexander and Cheung, Ngai-Man}, title = {Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8070-8079} }
UniLayDiff: A Unified Diffusion Transformer for Content-Aware Layout Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zeyang and Wang, Le and Zhou, Sanping and Wu, Yuxuan and Sun, Xiaolong and Hua, Gang and Li, Haoxiang}, title = {UniLayDiff: A Unified Diffusion Transformer for Content-Aware Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4668-4677} }
GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xiaolei and Fang, Chuangjie and Wu, Jie and Yang, Jingyi and Lin, Boyi and Zheng, Jianwei}, title = {GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {243-252} }
Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Pu and Li, Huafeng and Zhang, Yafei and Liu, Yu and Wang, Wen}, title = {Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1304-1313} }
Layer Embedding Deep Fusion Graph Neural Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Taihua and Tian, Genhao and Fan, Jicong and Yang, Xibei and Zhang, Qinghua and Cui, Yun}, title = {Layer Embedding Deep Fusion Graph Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7091-7100} }
CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sammani_2026_CVPR, author = {Sammani, Fawaz and Fischer, Jonas and Deligiannis, Nikos}, title = {CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3262-3272} }
From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Fei and Liu, Yu and Tang, Luyang and Sun, Mingchao and Ge, Zengye and Bu, Rui and Jin, Yuchao and Zhao, Haisen and Sun, He and Li, Yangyan and Xu, Mu and Chen, Wenzheng and Chen, Baoquan}, title = {From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {391-402} }
Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Jiedong and Lu, Lu and Dai, Ming and Chen, Jian and Liu, Qiang and Hu, Haoji}, title = {Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9023-9033} }
From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yukang and Wu, Wenxiao and Sun, Jianwen and Li, Chuanhao and Zhang, Fanrui and Li, Zizhen and Ai, Jiaxin and Zhou, Sizhuo and Chang, Yifan and Gao, Changxin and Zhang, Shenglin and Zhang, Kaipeng}, title = {From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2220-2229} }
FraQAT: Quantization Aware Training with Fractional Bits-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morreale_2026_CVPR, author = {Morreale, Luca and Gil C P Ramos, Alberto and Chadwick, Malcolm and Noroozi, Mehdi and Chavhan, Ruchika and Mehrotra, Abhinav}, title = {FraQAT: Quantization Aware Training with Fractional Bits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8514-8523} }
M^3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Hu, Yuxuan and Geng, Haoran and Chen, Xiangyu and Zhou, Chuhao and Cui, Ziteng and Yang, Jianfei}, title = {M{\textasciicircum}3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3070-3081} }
A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Changyu and Liang, James Chenhao and Yang, Wenhao and Cui, Yiming and Yang, Jinghao and Wang, Tianyang and Wang, Qifan and Liu, Dongfang and Han, Cheng}, title = {A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6943-6954} }
VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haitao and Li, Xu and Cao, Yuanyang and Zhang, Ying and Wang, Jianji}, title = {VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6809-6818} }
Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zidong and Huang, Yihao and Guo, Qing and Li, Tianlin and Li, Anran and Wang, Kailong and Dong, Jin Song and Pu, Geguang}, title = {Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8049-8058} }
EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification-
[pdf]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yuanlin and Wang, Zhenchuan and Chen, Jun and He, Yingying and Wang, Jiabao and Wang, Weiwen and Xu, Kun and Zhou, Zijin and Wang, Xiaoxiao and Chen, Mingju and Liu, Tingting and Pan, Zhisong}, title = {EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6758-6767} }
Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Xian-Hua}, title = {Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1294-1303} }
Beyond Syntax: Action Semantics Learning for App Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Bohan and Luo, Dezhao and Liu, Jianheng and Chen, Jingxuan and Gong, Shaogang and Hao, Jianye and Wang, Jun and Shao, Kun}, title = {Beyond Syntax: Action Semantics Learning for App Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9444-9454} }
Weakly-Supervised Referring Video Object Segmentation Through Text Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Miaojing and Huang, Jun and Yue, Zijie and Wang, Hanli}, title = {Weakly-Supervised Referring Video Object Segmentation Through Text Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7461-7471} }
WGS: Watertight Geometry Standardization for Scalable 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Dehao and Jia, Tanghui and Zhang, Kaiyi and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Zhang, Runze and Zhu, Lingting and Yuan, Li and Wang, Xin and Quan, Long}, title = {WGS: Watertight Geometry Standardization for Scalable 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {569-578} }
Don't Let the Information Slip Away-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Taozhe and Wang, Guansu and Yu, Bo and Liu, Yiming and Sun, Wei}, title = {Don't Let the Information Slip Away}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8504-8513} }
Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tezuka_2026_CVPR, author = {Tezuka, Ryuki and Nakatani, Chihiro and Ukita, Norimichi}, title = {Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8215-8225} }
AlignVAR: Towards Globally Consistent Visual Autoregression for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Cencen and Zhang, Dongyang and Yin, Wen and Wang, Jielei and Li, Tianyu and Guo, Ji and Jiang, Wenbo and Wang, Guoqing and Lu, Guoming}, title = {AlignVAR: Towards Globally Consistent Visual Autoregression for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5054-5063} }
ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Boran and Wang, Xinming and Chen, Yi and Li, Xiang and Xu, Jian and Yuan, Jing and Liu, Cheng-Lin}, title = {ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2773-2782} }
Dual Strategies for Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phuong_2026_CVPR, author = {Phuong, Nam Nguyen and Minh, Duc Nguyen The and Le Nguyen, Phi and Abbasnejad, Ehsan and Hoai, Minh}, title = {Dual Strategies for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2483-2492} }
Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shaoguang and Guo, Weiyu and Chen, Ziyang and Xu, Yijie and Hu, Xuming and Xiong, Hui}, title = {Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9856-9866} }
HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haoxuan and Li, Mengyan and Zheng, Junjun}, title = {HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8195-8204} }
Deepfake-Agent: Aggregating Semantic Forgery Clues for Generalizable Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Xiao and Zhang, Yue and Bansal, Mohit and Liu, Xiaoming}, title = {Deepfake-Agent: Aggregating Semantic Forgery Clues for Generalizable Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4898-4908} }
CrossWeaver: Towards Efficient Cross-Modal Interweaving and Decoupling for Weakly-Aligned Multispectral Object Detection-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Haitian and Fang, Juan and Zhu, Yiren and Zhao, Xudong and Guo, Yufei and Zhang, Xiaohan and Hu, Xiaoxing and Yang, Xue and Ming, Qi}, title = {CrossWeaver: Towards Efficient Cross-Modal Interweaving and Decoupling for Weakly-Aligned Multispectral Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6361-6370} }
GHOST: Fast Category-Agnostic Hand-Object Interaction Reconstruction from RGB Videos Using Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aboukhadra_2026_CVPR, author = {Aboukhadra, Ahmed Tawfik and Rogge, Marcel and Robertini, Nadia and Arafa, Abdalla and Malik, Jameel and Elhayek, Ahmed and Stricker, Didier}, title = {GHOST: Fast Category-Agnostic Hand-Object Interaction Reconstruction from RGB Videos Using Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3394-3404} }
CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jindal_2026_CVPR, author = {Jindal, Akshit and Anand, Saket and Arora, Chetan and Goyal, Vikram}, title = {CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {716-725} }
Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Jielun and Wang, Yabin and Li, Yaqi and Kong, Long and Hong, Xiaopeng}, title = {Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6655-6666} }
RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Chanseul and Yun, Seokju and Jun, Jaesung and Moon, Seungjae and Ro, Youngmin}, title = {RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7503-7513} }
Mitigating Batch Effects in Histopathology via Language-Mediated Robust Embedding Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yishu and Wu, Shushan and Zhang, Zhenzhong and Li, Didong and Yao, Huaxiu and Li, Yun and Carmichael, Iain and A Hoadley, Katherine and Zhu, Hongtu and Wu, Di and Zhang, Daiwei}, title = {Mitigating Batch Effects in Histopathology via Language-Mediated Robust Embedding Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5201-5213} }
The Mechanics of CNN Filtering with Rectification-
[pdf]
[supp]
[bibtex]@InProceedings{Frija-Altarac_2026_CVPR, author = {Frija-Altarac, Liam and Toews, Matthew}, title = {The Mechanics of CNN Filtering with Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1690-1699} }
PhyFusion: Physics-Aware Infrared and Visible Image Fusion via Modality-Specific Physical Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haiyang and Zhang, Huiqin and Zhang, Yanduo and Ma, Jiayi and Jiang, Junjun and Zhou, Huabing}, title = {PhyFusion: Physics-Aware Infrared and Visible Image Fusion via Modality-Specific Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4993-5002} }
AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging-
[pdf]
[bibtex]@InProceedings{Bazarvaani_2026_CVPR, author = {Bazarvaani, Zuchi and Lee, Seung-Ho and Ahn, Jeongmin and Jeon, Donghyeon and Kang, Inho and Na, Seung-Hoon}, title = {AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2700-2709} }
Anticipatory Planning for Multimodal AI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Yongyuan and Zhou, Shijie and Gu, Yu and Tan, Hao and Wu, Gang and Dernoncourt, Franck and Kil, Jihyung and Rossi, Ryan A. and Zhang, Ruiyi}, title = {Anticipatory Planning for Multimodal AI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5925-5935} }
Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, JunJie and Li, Miyu and Wang, Jiawei and Liu, Yu and Wang, Yumei}, title = {Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8887-8896} }
PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhilin and Zhang, Xiang and Wei, Jiaqi and Xu, Yiwei and You, Chenyu}, title = {PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9813-9823} }
Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaxuan and Xu, Qianqian and Wen, Peisong and Dai, Siran and Liu, Yang and Huang, Qingming}, title = {Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7924-7934} }
ADAPT: Attention Driven Adaptive Prompt Scheduling and InTerpolating Orthogonal Complements for Rare Concepts Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Kwanyoung and Oh, Hyunwoo and Cha, SeungJu and Koh, Sungho and Kim, Dong-Jin}, title = {ADAPT: Attention Driven Adaptive Prompt Scheduling and InTerpolating Orthogonal Complements for Rare Concepts Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4562-4571} }
Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Abid_2026_CVPR, author = {Abid, Mian Muhammad Naeem and Timofte, Radu}, title = {Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7293-7303} }
Re^2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jiakun and Xiao, Ting and Cao, Shiqin and Li, Xinran and Wang, Zhe and Bai, Chenjia}, title = {Re{\textasciicircum}2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1378-1387} }
Fast Generative DeOcclusion for Visual Geometry and Robotics-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jieneng and Zhang, Tiezheng and Xuan, Xiwei and He, Ju and Yin, Yifan and Shi, Haojun and Ye, Suyu and Li, Xinyi and Yuan, Ruisheng and Shu, Tianmin and Yuille, Alan}, title = {Fast Generative DeOcclusion for Visual Geometry and Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1314-1324} }
REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Chengxi and Japar, Nurul and Lim, Chee Kau}, title = {REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8280-8290} }
Long-Tailed Out-of-Distribution Detection with Refined Separate Class Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Shuai and Ge, Yuxin and Zhang, Baoming and Du, Yuntao and Chen, MingCai and Wang, Chongjun and Feng, Lei}, title = {Long-Tailed Out-of-Distribution Detection with Refined Separate Class Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6602-6611} }
ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qin and Li, Qi and Liu, Limei and Yang, Junfeng and Peng, Han}, title = {ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6997-7007} }
Scaling Pre-training to One Hundred Billion Data for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiao and Alabdulmohsin, Ibrahim and Salz, Daniel and Li, Zhe and Rong, Keran and Zhai, Xiaohua}, title = {Scaling Pre-training to One Hundred Billion Data for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6185-6196} }
LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map-
[pdf]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Wei and Wang, Xiaohan and Liu, Yuehu}, title = {LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1462-1471} }
Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Jiao, Qiang and Shi, Mengrui and Zhang, Qiang}, title = {Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7378-7387} }
VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Cheng, Chong and Yu, Sicheng and Guo, Xiaoyang and Wang, Hao}, title = {VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {414-424} }
V-GRPO: Online Reinforcement Learning for Denoising Generative Models Is Easier than You Think-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Bingda and Zhang, Yuhui and Wang, Xiaohan and Mao, Jiayuan and Schmidt, Ludwig and Yeung-Levy, Serena}, title = {V-GRPO: Online Reinforcement Learning for Denoising Generative Models Is Easier than You Think}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3769-3778} }
SocialMirror: Reconstructing 3D Human Interaction Behaviors from Monocular Videos with Semantic and Geometric Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Qi and Cong, Peishan and Wang, Ziyi and Sun, Yujing and Sun, Qin and Zhu, Xinge and Ye, Mao and Yang, Ruigang and Ma, Yuexin}, title = {SocialMirror: Reconstructing 3D Human Interaction Behaviors from Monocular Videos with Semantic and Geometric Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3510-3520} }
Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Kaiyuan and Xie, Shuangyu and Goldberg, Andrew and Goldberg, Ken}, title = {Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1452-1461} }
Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Srinivas_2026_CVPR, author = {Srinivas, Gurucharan and Niemeijer, Joshua and K\"oster, Frank}, title = {Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7122-7131} }
Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xiaowei and Liu, Zhide and Ma, Yuqing and Liu, Xianglong}, title = {Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9357-9367} }
Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection-
[pdf]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Ziyang and Chen, Weiyan and Xiao, Yao and Cao, Zijie and Zhang, Dongyu and Wei, Pengxu}, title = {Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8715-8724} }
Object Pose Transformer: Unifying Unseen Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weihang and Garattoni, Lorenzo and Despinoy, Fabien and Navab, Nassir and Busam, Benjamin}, title = {Object Pose Transformer: Unifying Unseen Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {436-446} }
Materialistic RIR: Material Conditioned Realistic RIR Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saad_2026_CVPR, author = {Saad, Mahnoor Fatima and Majumder, Sagnik and Grauman, Kristen and Al-Halah, Ziad}, title = {Materialistic RIR: Material Conditioned Realistic RIR Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5871-5881} }
Efficient Discrete Diffusion Model for Scalable Multi-Objective Traveling Salesman Problem-
[pdf]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Dawei and Fang, Zhanhong and Luo, Junyi and Wang, Debing and Chen, Jinbiao and Zhang, Zizhen}, title = {Efficient Discrete Diffusion Model for Scalable Multi-Objective Traveling Salesman Problem}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6261-6270} }
IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and Huang, Shijia and Wang, Liwei}, title = {IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8776-8785} }
Value bounds and Convergence Analysis for Averages of LRP attributions-
[pdf]
[supp]
[bibtex]@InProceedings{Binder_2026_CVPR, author = {Binder, Alexander and Takmil-Homayouni, Nastaran and Dogan, Urun}, title = {Value bounds and Convergence Analysis for Averages of LRP attributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3343-3353} }
Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Ni, Haomin and Lai, Hanjiang}, title = {Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {819-828} }
DepthScopy: Decoupling Frequency for Endoscopic Depth Estimation in Sparsely-Textured Regions-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Minghai and Zhang, Xiaoxian and Liu, Xiaoyue and Yang, Fan and Li, Lei}, title = {DepthScopy: Decoupling Frequency for Endoscopic Depth Estimation in Sparsely-Textured Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5265-5274} }
GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Ping_2026_CVPR, author = {Ping, Yuhan and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Pan, Jia and Wang, Wenping}, title = {GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7132-7142} }
DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Deng, Yuanyuan and Zhou, Kun}, title = {DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7654-7664} }
SFS-DETR: Spatial-Frequency Selection for UAV Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Dingding and Wang, Jiankang and Zhang, Longlong and Liu, Zhiheng and Wang, Xuan}, title = {SFS-DETR: Spatial-Frequency Selection for UAV Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6582-6591} }
UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Qing and Xu, Zhipei and Zhang, Xuanyu and Yu, Xiangyu and Zhang, Jian}, title = {UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8121-8132} }
Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2026_CVPR, author = {Nam, Sanghyeok and Kim, Byoungjun and Park, Daehyung and Kim, Tae-Kyun}, title = {Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3698-3708} }
Another BRIXEL in the Wall: Towards Cheaper Dense Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lappe_2026_CVPR, author = {Lappe, Alexander and Giese, Martin A.}, title = {Another BRIXEL in the Wall: Towards Cheaper Dense Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7605-7614} }
Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jie and Zhao, Na}, title = {Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {327-337} }
Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noguchi_2026_CVPR, author = {Noguchi, Chihiro and Yamamoto, Takaki}, title = {Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1096-1105} }
Mining Real-World Image Relations for Large-Scale Controllable Generation and Editing-
[pdf]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Hao and Liu, Liyang and Luo, Zhengxiong and Zong, Zhuofan and Li, Hongsheng}, title = {Mining Real-World Image Relations for Large-Scale Controllable Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3893-3902} }
Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media-
[pdf]
[supp]
[bibtex]@InProceedings{K.M_2026_CVPR, author = {K.M, Megha Mariam and Balasubramanian, Vineeth N. and Jawahar, C.V.}, title = {Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2079-2088} }
Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Powei and Abe, Jiro and Ogura, Kazumine}, title = {Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {109-118} }
VIDEOP2R: Video Understanding from Perception to Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yifan and Wang, Yueying and Zhao, Rui and Parag, Toufiq and Chen, Zhimin and Liao, Zhenyu and Unnikrishnan, Jayakrishnan}, title = {VIDEOP2R: Video Understanding from Perception to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8303-8313} }
DA-CLIP: Mitigating Granularity Mismatch in Zero-Shot Anomaly Detection via Decoupled Text-Visual Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jianqin and Wang, Peng and Huang, Junming and Zhou, Xue and Yu, Li}, title = {DA-CLIP: Mitigating Granularity Mismatch in Zero-Shot Anomaly Detection via Decoupled Text-Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6062-6071} }
Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuexin and Wang, Xiaolei and Cheng, Guangliang and Bai, Huihui and Tillo, Tammam and Xiao, Jimin}, title = {Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8836-8845} }
Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yujie and Qin, Haotong and Zhang, Zhang and Magno, Michele and Benini, Luca and Li, Yawei}, title = {Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2524-2533} }
Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ro_2026_CVPR, author = {Ro, Yusung and Choi, Jaehyun and Kim, Junmo}, title = {Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3252-3261} }
Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs-
[pdf]
[bibtex]@InProceedings{chen_2026_CVPR, author = {chen, Rongqian and Andreyev, Allison and Xiu, Yanming and Chilukuri, Joshua and Sen, Shunav and Imani, Mahdi and Li, Bin and Gorlatova, Maria and Tan, Gang and Lan, Tian}, title = {Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {799-808} }
WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lezhong and Kaya, Mehmet Onurcan and Bigdeli, Siavash Arjomand and Frisvad, Jeppe Revall}, title = {WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2007-2016} }
PrismPrune: Decoupling Saliency and Diversity in Attention for Efficient Visual Token Pruning in VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ziniu and Zhou, Shuheng and Liu, Mingqing and Deng, Hao and Zhu, Huijia}, title = {PrismPrune: Decoupling Saliency and Diversity in Attention for Efficient Visual Token Pruning in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6174-6184} }
From Coarse to Precise: Rethinking and Bridging Localization in Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Lysa and Liesaputra, Veronica and Szymanski, Lech and Cranefield, Stephen}, title = {From Coarse to Precise: Rethinking and Bridging Localization in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5882-5891} }
UMI-HOI: Unifying Multimodal Information with Semantic Multi-Head Attention for Human-Object Interaction Detection-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yuankai and Li, Zhinan and Patsch, Constantin and Zakour, Marsil and Salihu, Driton and Steinbach, Eckehard}, title = {UMI-HOI: Unifying Multimodal Information with Semantic Multi-Head Attention for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5999-6008} }
Context-Aware Semantic Segmentation via Stage-Wise Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Carreaud_2026_CVPR, author = {Carreaud, Antoine and Naha, Elias and Chansel, Arthur and Lahellec, Nina and Skaloud, Jan and Gressin, Adrien}, title = {Context-Aware Semantic Segmentation via Stage-Wise Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2680-2690} }
Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xincheng and Sun, Hanchi and Sun, Wenjun and Xue, Kejun and Zhou, Wangqiu and Zhang, Jianbo and Sun, Wei and Zhu, Dandan and Min, Xiongkuo and Jia, Jun and Fang, Zhijun}, title = {Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2230-2239} }
Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yongqi and Li, Yuke and Huang, Heng and Li, Zhihui and Du, Bo and Wu, Yu}, title = {Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8019-8028} }
Mitigating Information Forgetting via Entropy-Driven Progressive Retrospection for Multimodal Long Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yifei and Xu, Ning and Jin, Guoqing and Zhang, Shenyuan and Liu, An-An}, title = {Mitigating Information Forgetting via Entropy-Driven Progressive Retrospection for Multimodal Long Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5755-5764} }
Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyi and Liu, Zhou and Zhang, Wentao}, title = {Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1996-2006} }
OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Yuping and Xie, Yuhan and Li, Yuanshuai and Yu, Yingchao and Lyu, Lingjuan and Jin, Yaochu}, title = {OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1965-1975} }
ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Hao and Liang, Chengbin and Guo, Wenqi and Qin, Zhijin and Han, Jungong}, title = {ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2915-2925} }
Circuit Tracing in Vision-Language Models: Understanding the Internal Mechanisms of Multimodal Thinking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jingcheng and Xiong, Tianhu and Qian, Shengyi and Nahrstedt, Klara and Wu, Mingyuan}, title = {Circuit Tracing in Vision-Language Models: Understanding the Internal Mechanisms of Multimodal Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3322-3331} }
ASTRA: Enhancing Multi-Subject Generation with Retrieval-Augmented Pose Guidance and Disentangled Position Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Tianze and Ning, Zijian and Zhao, Zonglin and Wang, Mingjia}, title = {ASTRA: Enhancing Multi-Subject Generation with Retrieval-Augmented Pose Guidance and Disentangled Position Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3820-3829} }
In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yunxuan and Liu, Bohao and Wu, Yanxia and Li, Rongsheng}, title = {In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2398-2407} }
Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xiankang and Guo, Dongyan and Li, Hongji and Cui, Ying and Weng, Libo and Li, Ruibo and Zhang, Chi}, title = {Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {591-601} }
FA-MoE: Improving Medical Image Generation Through Frequency-Aware Mixture of Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Yifan and Meng, Qingjie and Chen, Tao and Chen, Huiping}, title = {FA-MoE: Improving Medical Image Generation Through Frequency-Aware Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3988-3997} }
PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Wenxiang and Chen, Anpei and Yu, Haoming and Shen, Yujun and Xu, Weiwei}, title = {PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3156-3166} }
Dual Anchors, Do It Better: Hierarchical Group Merging for Zero-Shot Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Roh_2026_CVPR, author = {Roh, Jimin and Kim, Dongkyu and Kang, Suk-Ju}, title = {Dual Anchors, Do It Better: Hierarchical Group Merging for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6218-6227} }
Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peng and Yuan, Xiang and Li, Cong and Han, Junwei and Cheng, Gong}, title = {Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6829-6838} }
Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features-
[pdf]
[bibtex]@InProceedings{Seong_2026_CVPR, author = {Seong, Minseo and Kim, Youngwook}, title = {Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2473-2482} }
Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Reddy_2026_CVPR, author = {Reddy, Sanjana and Malhi, Ishaan and Ma, Sally and Dutta, Praneet}, title = {Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8868-8876} }
VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Hui and Wang, Shunli and Zhao, Yandan and Yao, Taiping and Ding, Shouhong}, title = {VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9552-9562} }
FlowC2S: Flowing from Current to Succeeding Frames for Fast and Memory-Efficient Video Continuation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Margaryan_2026_CVPR, author = {Margaryan, Hovhannes and Bammey, Quentin and Sandor, Christian}, title = {FlowC2S: Flowing from Current to Succeeding Frames for Fast and Memory-Efficient Video Continuation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3861-3872} }
Fast Kernel-Space Diffusion for Remote Sensing Pansharpening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Hancong and Cao, Zihan and Deng, Liang-Jian and Li, Jingjing}, title = {Fast Kernel-Space Diffusion for Remote Sensing Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6291-6301} }
Name That Part: 3D Part Segmentation and Naming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paul_2026_CVPR, author = {Paul, Soumava and Kaushik, Prakhar and Vaidya, Ankit and Bhattad, Anand and Yuille, Alan}, title = {Name That Part: 3D Part Segmentation and Naming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1808-1817} }
DebFilter: Eradicating Biases Stashed in Value-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Seung Hyuk and Kim, Songkuk}, title = {DebFilter: Eradicating Biases Stashed in Value}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4790-4799} }
Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2026_CVPR, author = {Chung, Hyungjin and Nam, Hyelin and Kim, Jiyeon and Go, Hyojun and Park, Byeongjun and Kim, Junho and Lee, Joonseok and Ha, Seongsu and Kim, Byung-Hoon}, title = {Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8972-8982} }
FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series-
[pdf]
[bibtex]@InProceedings{Garg_2026_CVPR, author = {Garg, Devansh}, title = {FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1934-1942} }
MAE-XNT: A Foundation Model for Segmenting Neuronal Tissue Volumes Generated with X-Ray Nanotomography-
[pdf]
[supp]
[bibtex]@InProceedings{Laugros_2026_CVPR, author = {Laugros, Alfred and Roig, Sebastien and Pacureanu, Alexandra}, title = {MAE-XNT: A Foundation Model for Segmenting Neuronal Tissue Volumes Generated with X-Ray Nanotomography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5388-5398} }
Vision-Language Models Encode Clinical Guidelines for Concept-Based Medical Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Harmanani_2026_CVPR, author = {Harmanani, Mohamed and Long, Bining and Guo, Zhuoxin and Wilson, Paul F.R. and Sabour, Amirhossein and To, Minh Nguyen Nhat and Fichtinger, Gabor and Abolmaesumi, Purang and Mousavi, Parvin}, title = {Vision-Language Models Encode Clinical Guidelines for Concept-Based Medical Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5589-5598} }
Intelligent Photo Retouching with Language Model-Based Artist Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Haoyu and Tao, Keda and Wang, YiZao and Wang, Xinlei and Zhu, Lei and Gu, Jinjin}, title = {Intelligent Photo Retouching with Language Model-Based Artist Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1240-1251} }
NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Amemiya_2026_CVPR, author = {Amemiya, Kanon and Yashima, Daichi and Katsumata, Kei and Komatsu, Takumi and Korekata, Ryosuke and Otsuki, Seitaro and Sugiura, Komei}, title = {NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9034-9044} }
CREM: Compression-Driven Representation Enhancement for Multimodal Retrieval and Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Lihao and Yang, Biao and Wang, Yan and Li, Da and Cao, Jiangxia and Luo, Yuxiao and Chen, Xiang and Wu, Xiangyu and Yuan, Wei and Yang, Fan and Ding, Guiguang and Gao, Tingting and Zhou, Guorui}, title = {CREM: Compression-Driven Representation Enhancement for Multimodal Retrieval and Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5714-5724} }
AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijin and Zhang, Songan}, title = {AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9259-9268} }
CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashwin and Holland, Robbie and Barrett, Corey and Kim, Jangwon and Varma, Maya and Chen, Zhihong and Gao, Yunhe and Zaharchuk, Greg and Taghavi, Tara and Kenthapadi, Krishnaram and Chaudhari, Akshay}, title = {CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9466-9476} }
Open-Set Spatial Gene Expression Prediction from Histological Images via Retrieval-Augmented Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chaochen and Zuo, Meiyun and Xie, Lei}, title = {Open-Set Spatial Gene Expression Prediction from Histological Images via Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5495-5504} }
AnatomiX, an Anatomy-Aware Grounded Multimodal Large Language Model for Chest X-Ray Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hashmi_2026_CVPR, author = {Hashmi, Anees Ur Rehman and Saeed, Numan and Lippert, Christoph}, title = {AnatomiX, an Anatomy-Aware Grounded Multimodal Large Language Model for Chest X-Ray Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6009-6018} }
A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaidong and Zhang, Jian and Xu, Rongtao and Sun, Yu and Wen, Youpeng and Xue, Shuoshuo and Guo, Xiaoyu and Guo, Minghao and Liufu, Weijia and Zihou, Liu and Ji, Kangyi and Li, Zihang and Chen, Ruiyi and Cao, Meng and Zhang, Jingming and Zhao, Shen and Chang, Xiaojun and Zheng, Feng and Laptev, Ivan and Liang, Xiaodan}, title = {A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1503-1514} }
Rethinking Conditioning in Diffusion Models: Dynamic Token Scheduling for Efficient and Aligned Text-to-Image Generation-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jia and Fu, Xiaomeng and Gao, Yizhao and Wang, Jiaxu and Wang, Xi and So, Hayden Kwok-Hay}, title = {Rethinking Conditioning in Diffusion Models: Dynamic Token Scheduling for Efficient and Aligned Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4160-4169} }
Beyond Top-1: Forensic Analysis of Full Prediction Distributions Reveals Hidden Model Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {Beyond Top-1: Forensic Analysis of Full Prediction Distributions Reveals Hidden Model Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3273-3281} }
Entropy-Based Visual Re-perception Inference for Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liufu_2026_CVPR, author = {Liufu, Jia and Yan, Qiangyu and Kan, Zhehan and Yang, Wenming and Hu, Hailin and Chen, Xinghao and Jiang, Borui}, title = {Entropy-Based Visual Re-perception Inference for Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9770-9779} }
MotionDuet: Dual-Conditioned 3D Human Motion Generation with Video-Regularized Text Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Yang and Sun, Tengjiao and Fang, Pengcheng and Wang, Deng-Bao and Cai, Xiaohao and Zhang, Min-Ling and Kim, Hansung}, title = {MotionDuet: Dual-Conditioned 3D Human Motion Generation with Video-Regularized Text Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3709-3718} }
Is Prompt Selection Necessary for Task-Free Online Continual Learning?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Seoyoung and Lee, Haemin and Lee, Hankook}, title = {Is Prompt Selection Necessary for Task-Free Online Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7883-7892} }
3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Sizhe and Chi, Yankuan and Zhong, Shuhan and Chan, S.-H. Gary}, title = {3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {360-369} }
Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Rubab_2026_CVPR, author = {Rubab, Syeda Fiza and Ghaffar, Arslan Abdul and Gul, Malik Junaid Jami and Murtala, Sheriff and Lee, Ingyu and Choi, Gyu Sang}, title = {Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2844-2851} }
Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hantao and Han, Ning and Zeng, Yawen and Zhu, Hegui and Chen, Hao}, title = {Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9045-9054} }
ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Yi_2026_CVPR, author = {Yi, Ling and Chen, Zhe and Wu, Gaochang and Ding, Jinliang and Wang, Xiaojie and Ning, Zhaolong}, title = {ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7697-7705} }
Phantasia: Context-Adaptive Backdoors in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Nam Duong and Le Nguyen, Phi}, title = {Phantasia: Context-Adaptive Backdoors in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {695-704} }
CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment-
[pdf]
[supp]
[bibtex]@InProceedings{Merothiya_2026_CVPR, author = {Merothiya, Satyam and Kamra, Chanda Grover and Mastan, Indra Deep}, title = {CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8695-8704} }
FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengjie and Yang, Liu and Shen, Qi}, title = {FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6987-6996} }
Model Merging on Loss Landscapes: A Geometric Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Juanwu and Bhaskar, Anand and Axelrod, Brian and Tolstaya, Ekaterina and Emrich, Tristan}, title = {Model Merging on Loss Landscapes: A Geometric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7644-7653} }
Rethinking Whole-Body CT Image Interpretation: An Abnormality-Centric Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziheng and Dai, Lisong and Zhang, Ya and Xie, Weidi and Wang, Yanfeng}, title = {Rethinking Whole-Body CT Image Interpretation: An Abnormality-Centric Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5179-5189} }
DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Haiduo and Song, Jiangcheng and Zhang, Yadong and Wang, Guansu and Ren, Pengju}, title = {DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2111-2120} }
Efficient Document Parsing via Parallel Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Lei and Zhao, Ze and Li, Meng and Lun, Zhongwang and Yuan, Yi and Lu, Xingjing and Wei, Zheng and Bian, Jiang and Li, Zang}, title = {Efficient Document Parsing via Parallel Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2763-2772} }
PaLMR: Towards Faithful Visual Reasoning via Multimodal Process Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yantao and Yan, Chenyang and Hui, Qiang and Zhao, Fang and Cheng, Kanzhi and Tan, Chao and Gao, Huanlin and Zhang, Jianbing and Wang, Kai and Dai, Xinyu and Lian, Shiguo}, title = {PaLMR: Towards Faithful Visual Reasoning via Multimodal Process Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6111-6121} }
SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yongchao and Huang, Ziyue and Zhang, Jinqing and Cai, Wenrui and Liu, Qingjie}, title = {SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7779-7788} }
VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zihu and Xu, Boxun and Xia, Yuxuan and Li, Peng}, title = {VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9055-9064} }
Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Qinlin and Zhou, Mingliang and Liao, Xingran}, title = {Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2629-2638} }
Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Woo_2026_CVPR, author = {Woo, Sungwon and Hwang, Dongjun and Kim, Shiwon and Choe, Junsuk and Nang, Jongho}, title = {Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7634-7643} }
NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Lizhi and Feng, Jianguo and Li, Ziqiang and Li, Jun and Jiang, Weiwei and Fu, Zhangjie}, title = {NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {675-684} }
FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yicong and Leung, Howard}, title = {FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8258-8267} }
TinySR: Shallow Diffusion Transformers for Real-World Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Linwei and Fan, Qingnan and Yu, Yuhang and Zhang, Qi and Chen, Jinwei and Luo, Yawei and Zou, Changqing}, title = {TinySR: Shallow Diffusion Transformers for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5075-5085} }
4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Dai, Jun and Chen, Yutian and Xu, Linning and Yu, Mulin and Wang, Yujin and Guo, Shi and Le, Xinyi and Xue, Tianfan}, title = {4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {43-53} }
ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yanzhao and Ding, Yi and Tang, Peijun and Yang, Haotian and Qi, Xianbiao and Wang, Jianan and Wang, Xueqian}, title = {ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1441-1451} }
Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jingi and Kim, Wonjun}, title = {Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {139-148} }
FedErase: Personalized Federated Unlearning for Text-to-Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Tianyu and Liang, Wenfei and Wang, Sijie and She, Rui and Tay, Wee Peng}, title = {FedErase: Personalized Federated Unlearning for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4035-4044} }
WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Z. Jonny and Paul, Sibendu and Hu, Y. Charlie}, title = {WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8618-8628} }
Organizing Unstructured Image Collections using Natural Language-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mingxuan and Zhong, Zhun and Li, Jun and Franchi, Gianni and Roy, Subhankar and Ricci, Elisa}, title = {Organizing Unstructured Image Collections using Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8907-8918} }
Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Mingbo and Yan, Han and Zhang, Bolun and Ran, Wu and Ma, Chao}, title = {Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {181-190} }
MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Haote and Wang, Hui and Zhu, Chen and Wang, Jingchao and Li, Linye and Lai, Hongbin and Ao, Huijie and Lv, Yongxuan and Wu, Jiang and Sun, Jiaxing and Chen, Lua and Cao, Yuanyuan and Zhang, Ruijie and Lu, Shengxin and Wu, Lijun and Wang, Bin and He, Conghui}, title = {MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1924-1933} }
SCAIL: Towards Studio-Grade Character Animation via In-Context Learning of 3D-Consistent Pose Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Wenhao and Ye, Sheng and Yang, Zhuoyi and Teng, Jiayan and Dong, ZhenHui and Wen, Kairui and Gu, Xiaotao and Liu, Yong-Jin and Tang, Jie}, title = {SCAIL: Towards Studio-Grade Character Animation via In-Context Learning of 3D-Consistent Pose Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4450-4460} }
Latent Domain Modeling Improves Robustness to Geographic Shifts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Crasto_2026_CVPR, author = {Crasto, Ruth and Rolf, Esther}, title = {Latent Domain Modeling Improves Robustness to Geographic Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2419-2428} }
Zero4D: Training-Free 4D Video Generation From Single Video Using Off-the-Shelf Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jangho and Kwon, Taesung and Ye, Jong Chul}, title = {Zero4D: Training-Free 4D Video Generation From Single Video Using Off-the-Shelf Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4045-4054} }
Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Jouwon and Kim, Sohyeon and Kong, Kyeongbo}, title = {Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9510-9519} }
MADrive: Memory-Augmented Driving Scene Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karpikova_2026_CVPR, author = {Karpikova, Polina and Selikhanovych, Daniil and Struminsky, Kirill and Musaev, Ruslan and Golitsyna, Maria and Baranchuk, Dmitry}, title = {MADrive: Memory-Augmented Driving Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {54-65} }
BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziheng and Guo, Yuncheng and Xu, Jie and Gu, Xiaodong}, title = {BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7060-7069} }
INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Madinei_2026_CVPR, author = {Madinei, Parsa and Solgi, Ryan and Wen, Ziqi and Skaza, Jonathan and Eckstein, Miguel and Pedarsani, Ramtin}, title = {INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2947-2956} }
Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Domico_2026_CVPR, author = {Domico, Kyle and Ferrand, Jean-Charles Noirot and Sheatsley, Ryan and Pauley, Eric and Hanna, Josiah and McDaniel, Patrick}, title = {Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {647-655} }
TAUE: Training-free Noise Transplant and Cultivation Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagai_2026_CVPR, author = {Nagai, Daichi and Morita, Ryugo and Kitada, Shunsuke and Iyatomi, Hitoshi}, title = {TAUE: Training-free Noise Transplant and Cultivation Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3749-3758} }
Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation-
[pdf]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Shuqi and Shi, Guangze and Cao, Jiarui and Shi, Aoyuan and Liu, Meilin and Zhang, Xiaoyi and Wang, Yujie and Liu, Xueyu and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang}, title = {Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7514-7519} }
VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Zengjie and Qiu, Jiantao and Bai, Tianyi and Yang, Haojin and Yuan, Binhang and Jing, Qi and He, Conghui and Zhang, Wentao}, title = {VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9846-9855} }
3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beiyuan and Li, Hesong and Shao, Ruiwen and Fu, Ying}, title = {3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {306-315} }
Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung and Seo, Paul Hongsuck}, title = {Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8877-8886} }
Fast Autoregressive Video Generation with Diagonal Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Yang and Guo, Junliang and Wu, Haoyu and He, Tianyu and Pearce, Tim and Rashid, Tabish and Hofmann, Katja and Bian, Jiang}, title = {Fast Autoregressive Video Generation with Diagonal Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4419-4428} }
Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yilin and Luo, Dawei and Chen, Shuai and Xu, Feng and Wang, Jiachi and Feng, Zunlei and Bei, Yijun}, title = {Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2314-2323} }
HAM: A Training-Free Style Transfer Approach via Heterogeneous Attention Modulation for Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yeqi and Li, Liang and Yang, Zhiwen and Sheng, Xichun and Zhao, Zhidong and Yan, Chenggang}, title = {HAM: A Training-Free Style Transfer Approach via Heterogeneous Attention Modulation for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3914-3923} }
PGDM: Physics-Guided Noise-Free Diffusion Model Based on Point Spread Function for Light-Scattering Removal in Unpaired Biomedical Images-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jinze and Han, Keyi and Huang, Qiushi and Tian, Jie and Hu, Zhenhua}, title = {PGDM: Physics-Guided Noise-Free Diffusion Model Based on Point Spread Function for Light-Scattering Removal in Unpaired Biomedical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5599-5608} }
FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Minh Khoa and Do, Kien and Nguyen, Duc Thanh and Tran, Truyen}, title = {FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4234-4244} }
FREESTYLE: An Anchor-Free Mechanism for Training-Free Style-Aligned Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Minseok and Park, Jihun and Gim, Jongmin and Choi, Minwoo and Lee, Kyoungmin and Fioretto, Ferdinando and Im, Sunghoon}, title = {FREESTYLE: An Anchor-Free Mechanism for Training-Free Style-Aligned Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3779-3788} }
Enriching Knowledge Distillation with Cross-Modal Teacher Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mansourian_2026_CVPR, author = {Mansourian, Amir M. and Babaei, Amir Mohammad and Kasaei, Shohreh}, title = {Enriching Knowledge Distillation with Cross-Modal Teacher Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2893-2903} }
SAT: Selective Aggregation Transformer for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Dinh Phu and Do, Thao and Wazir, Saad and Kim, Seongah and Kim, Seon Kwon and Kim, Daeyoung}, title = {SAT: Selective Aggregation Transformer for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4982-4992} }
Video2LoRA: Unified Semantic-Controlled Video Generation via Per-Reference-Video LoRA-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zexi and Li, Baolu and Dai, Jing and Zhang, Yiming and Ma, Yue and Wang, Qinghe and Jia, Xu and Xu, Hongming}, title = {Video2LoRA: Unified Semantic-Controlled Video Generation via Per-Reference-Video LoRA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4202-4212} }
Brain-Inspired Multimodal Spike Neural Network for Image-Text Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zong_2026_CVPR, author = {Zong, Xintao and Liu, Wenxuan and Ding, Jianhao and Yu, Zhaofei and Zhong, Xian and Huang, Tiejun}, title = {Brain-Inspired Multimodal Spike Neural Network for Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5137-5146} }
GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyu and Shi, Yanzhao and Zheng, Chengxin and Wang, Hua and Wang, Jianing and Zhang, Yue and Yu, Xiaobing and Zhang, Xiaodan}, title = {GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9622-9631} }
Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Fu, Zhouchao and Lu, Wenbin and Zheng, Junjie and Xu, Junnan and Liao, Junjie and Zheng, Jianwei}, title = {Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1546-1555} }
AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuting and Wang, Lan and Zhao, Hengyuan and Huang, Linjiang and Liu, Si and Guo, Qingpei}, title = {AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9205-9214} }
SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Chengshan and Zhang, Pengnian and Zhao, Jinjing}, title = {SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6695-6705} }
MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Zhenhan and Wang, Xuanhan and Jiang, Jiahao and Deng, Kaiyuan and Chen, Pengqi and Li, Shuangle and Liu, Chong and Xu, Xing and Song, Jingkuan and Gao, Lianli and Shen, Heng Tao}, title = {MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1535-1545} }
PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xueheng and Hu, Tao and Cao, Ke and Qi, Runsheng and Zhang, Huixin and Li, Rui and Zhang, Jie and Xie, Chengjun}, title = {PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8826-8835} }
WHOLE: World-Grounded Hand-Object Lifted from Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Yufei and Li, Jiaman and Rong, Ryan and Liu, C. Karen}, title = {WHOLE: World-Grounded Hand-Object Lifted from Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3481-3491} }
3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sautter_2026_CVPR, author = {Sautter, Tobias and Dihlmann, Jan-Niklas and A Lensch, Hendrik P}, title = {3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {528-537} }
E-GRPO: High Entropy Steps Drive Effective Reinforcement Learning for Flow Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shengjun and Zhang, Zhang and Dai, Chensheng and Duan, Yueqi}, title = {E-GRPO: High Entropy Steps Drive Effective Reinforcement Learning for Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4429-4439} }
Video Generation Models are Good Latent Reward Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2026_CVPR, author = {Mi, Xiaoyue and Yu, Wenqing and Lian, Jiesong and Jie, Shibo and Zhong, Ruizhe and Liu, Zijun and Zhang, Guozhen and Zhou, Zixiang and Xu, Zhiyong and Zhou, Yuan and Lu, Qinglin and Tang, Fan}, title = {Video Generation Models are Good Latent Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4719-4728} }
GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Beibei and Cao, Xiao and Guo, Jingyuan and Tan, Robby T.}, title = {GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {275-284} }
RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ranjan_2026_CVPR, author = {Ranjan, Ravi and Grover, Utkarsh and Lin, Xiaomin and Polyzou, Agoritsa}, title = {RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7998-8008} }
Beyond Single Object: Learning 3D Relations with Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ide_2026_CVPR, author = {Ide, Kohsuke and Yamada, Ryousuke and Qiu, Yue and Ma, Xianzheng and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Satoh, Yutaka}, title = {Beyond Single Object: Learning 3D Relations with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9684-9694} }
Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Sijie and Zhu, Yingying}, title = {Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7615-7624} }
From Adaptation to Generalization: Adaptive Visual Prompting for Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cetinkaya_2026_CVPR, author = {\c{C}etinkaya, Evren and Lee, Sangmin and Kim, Jung Uk and Lee, Hong Joo and Navab, Nassir}, title = {From Adaptation to Generalization: Adaptive Visual Prompting for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5325-5335} }
Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Zhexi and Wang, Haoran and Yan, Xuerun and Lin, Weimeng and Zhang, Xianhong and Chen, Yongyu and Hu, Jia}, title = {Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {920-930} }
From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarabia_2026_CVPR, author = {Sarabia, Rafael Pablos and Nyborg, Joachim and Birk, Morten and Assent, Ira}, title = {From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2606-2617} }
StereoSpace: Depth-Free Synthesis of Stereo Geometry via End-to-End Diffusion in a Canonical Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behrens_2026_CVPR, author = {Behrens, Tjark and Obukhov, Anton and Ke, Bingxin and Tosi, Fabio and Poggi, Matteo and Schindler, Konrad}, title = {StereoSpace: Depth-Free Synthesis of Stereo Geometry via End-to-End Diffusion in a Canonical Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3975-3987} }
Do Vision Models Perceive Illusory Motion in Static Images Like Humans?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rosario_2026_CVPR, author = {Rosario, Isabella E. and Cheng, Fan L. and Sun, Zitang and Kriegeskorte, Nikolaus}, title = {Do Vision Models Perceive Illusory Motion in Static Images Like Humans?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5515-5524} }
Volumetrically Consistent Implicit Atlas Learning via Neural Diffeomorphic Flow for Placenta MRI-
[pdf]
[supp]
[bibtex]@InProceedings{Taymourtash_2026_CVPR, author = {Taymourtash, Athena and Abulnaga, S Mazdak and Abaci-Turk, Esra and Grant, P Ellen and Golland, Polina}, title = {Volumetrically Consistent Implicit Atlas Learning via Neural Diffeomorphic Flow for Placenta MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5285-5294} }
Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2026_CVPR, author = {Yi, Ding and Yu, Yanzhao and Dai, Xili and Qi, Xianbiao and Sun, Peiwen and Wang, Xueqian and Yue, Xiangyu and Wang, Jianan}, title = {Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1346-1357} }
Learning from Label Proportion with Dual-Proportion Constraints-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Tianhao and Li, Ximing and Li, Changchun and Guan, Renchu}, title = {Learning from Label Proportion with Dual-Proportion Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7583-7592} }
Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhi_2026_CVPR, author = {Zhi, Helu and Huang, Jingjing and Xu, Wang and Xu, Yangbin and Huang, Yibin and Zhang, Wanyue and Jiang, Baoyang and Deng, Shirui and Zhu, Liang and Li, FangFang and Zhao, Tiejun and Lin, Yankai and Yao, Yuan}, title = {Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9215-9224} }
EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Lulin and Li, Dayou and Liang, Yiqing and Jiang, Sicong and Vijay, Hitesh and Hu, Hezhen and Xu, Xuhai and Liu, Zirui and Shakkottai, Srinivas and Li, Manling and Fan, Zhiwen}, title = {EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2017-2027} }
FUSION: Full-body Unified Motion Prior for Body and Hands Via Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duran_2026_CVPR, author = {Duran, Enes and Athanasiou, Nikos and Kocabas, Muhammed and Black, Michael J. and Taheri, Omid}, title = {FUSION: Full-body Unified Motion Prior for Body and Hands Via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3438-3448} }
Fast-HOI: Fast Human-Object Interaction Synthesis via Distilled Interaction Prior and Physical Constrains-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Xiaokang and Zhang, Zhizhong and Liu, Yangyuan and Chen, Zhuoran and Zhang, Zhiwei and Ji, Bin and Chen, Mingang and Xie, Yong and Gong, Jingyu and Wang, Xuhong and Tan, Xin and Xie, Yuan}, title = {Fast-HOI: Fast Human-Object Interaction Synthesis via Distilled Interaction Prior and Physical Constrains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3719-3728} }
Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Pengfei and Luo, Sifu and Wu, Feng and Zhou, Fan and Zhong, Ting}, title = {Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1148-1158} }
VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Rivera, Esteban and Lienkamp, Markus}, title = {VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {960-969} }
PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Tao and Meng, Jiayang and Chen, Hong and Hou, Chen and Zheng, Guolong and Yang, Xu}, title = {PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8029-8038} }
Gen-n-Val: Agentic Image Data Generation and Validation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jing-En and Fang, I-Sheng and Huang, Tzuhsuan and Liu, Yu-Lun and Wang, Chih-Yu and Chen, Jun-Cheng}, title = {Gen-n-Val: Agentic Image Data Generation and Validation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8786-8795} }
Event-Based Optical Flow Leveraging Precise Event Timing-
[pdf]
[supp]
[bibtex]@InProceedings{Greatorex_2026_CVPR, author = {Greatorex, Hugh and Chicca, Elisabetta}, title = {Event-Based Optical Flow Leveraging Precise Event Timing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3178-3188} }
PBSBench: A Multi-Level Vision-Language Framework and Benchmark for Hematopathology Whole Slide Image Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanlong and Chen, Weichi and Rajab, Adrian and Liu, Wenfang and Jin, Yulan and Srisuwananukorn, Andrew and Zhang, Ping}, title = {PBSBench: A Multi-Level Vision-Language Framework and Benchmark for Hematopathology Whole Slide Image Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5569-5578} }
QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zijun and Wu, Zhengqian and Zhang, Chunjie and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8247-8257} }
Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Frahm_2026_CVPR, author = {Frahm, Noah and Patel, Prakrut and Zhang, Yue and Yu, Shoubin and Bansal, Mohit and Sengupta, Roni}, title = {Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3114-3123} }
AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Chen, Yuanhong and Wang, Chong and Han, Junlin and Wu, Junde and Peng, Can and Chen, Jingkun and Tian, Yu and Carneiro, Gustavo}, title = {AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7315-7325} }
Conformal Cross-Modal Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Huy Hoang and Jung, C\'edric and Salehi, Shirin and Gl\"uck, Tobias and Schmeink, Anke and Kugi, Andreas}, title = {Conformal Cross-Modal Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5147-5157} }
MPM: Mutual Pair Merging for Efficient Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Rave_2026_CVPR, author = {Rav\'e, Simon and Rasti, Pejman and Rousseau, David}, title = {MPM: Mutual Pair Merging for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2998-3008} }
HazeMatching: Dehazing Light Microscopy Images with Guided Conditional Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ray_2026_CVPR, author = {Ray, Anirban and Ashesh, Ashesh and Jug, Florian}, title = {HazeMatching: Dehazing Light Microscopy Images with Guided Conditional Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5652-5661} }
Gaussian Shannon: High-Precision Diffusion Model Watermarking Based on Communication-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi and Huang, Hongbo and Zhang, Liang-Jie}, title = {Gaussian Shannon: High-Precision Diffusion Model Watermarking Based on Communication}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3924-3932} }
Prototype and Sample Level Semantic Alignment for Incomplete Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Zhou, Pei and Bai, Lanxi and Nie, Jia and Cheng, Li and Min, Shiquan and Zhu, Jiangping}, title = {Prototype and Sample Level Semantic Alignment for Incomplete Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5818-5827} }
Diffusion^2: Turning 3D Environments into Radio Frequency Heatmaps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Kyoungjun and Yang, Yifan and Ge, Changhan and Qiu, Lili and Jiang, Shiqi}, title = {Diffusion{\textasciicircum}2: Turning 3D Environments into Radio Frequency Heatmaps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6414-6423} }
Objects in Generated Videos Are Slower Than They Appear: Models Suffer Sub-Earth Gravity and Don't Know Galileo's Principle...for now-
[pdf]
[supp]
[bibtex]@InProceedings{Thozhiyoor_2026_CVPR, author = {Thozhiyoor, Varun Varma and Tripathi, Shivam and Radhakrishnan, Venkatesh Babu and Bhattad, Anand}, title = {Objects in Generated Videos Are Slower Than They Appear: Models Suffer Sub-Earth Gravity and Don't Know Galileo's Principle...for now}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3830-3839} }
Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jiaying and Ye, Shuquan and Xu, Dan and Ouyang, Wanli and Lau, Rynson W. H.}, title = {Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1818-1827} }
One Model for All: Unified Try-On and Try-Off in Any Pose via LLM-Inspired Bidirectional Tweedie Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jinxi and He, Zijian and Wang, Guangrun and Li, Guanbin and Lin, Liang}, title = {One Model for All: Unified Try-On and Try-Off in Any Pose via LLM-Inspired Bidirectional Tweedie Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4310-4320} }
Surgical Procedural Planning as 3D World Modelling: Towards Automated Pulmonary Resection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhen and Dong, Zhaorong and Yang, Xiao and Huang, Liqin and Wu, Qiang and Zeng, Taidui and Zheng, Hanyu and Yang, Mingjing and Zheng, Shaohua and Ding, Wangbin and Pan, Lin}, title = {Surgical Procedural Planning as 3D World Modelling: Towards Automated Pulmonary Resection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5315-5324} }
Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Eun-Ju and Shin, Youjin and Woo, Simon S.}, title = {Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7978-7987} }
AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning-
[pdf]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qilin and Fan, Qilin and Li, Xinrui and Wang, Tianfu and Qiu, Shuting and Niu, Yue}, title = {AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3019-3028} }
A Framework for Evaluating Zero-Shot Image Generation in Concept-Based Explainability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Astolfi_2026_CVPR, author = {Astolfi, Giacomo and Bianchi, Matteo and Campi, Riccardo and De Santis, Antonio and Brambilla, Marco}, title = {A Framework for Evaluating Zero-Shot Image Generation in Concept-Based Explainability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3303-3311} }
Visual2Echo Compositional Contrastive Learning (V2E-CCL): Binaural Knowledge Distilled Network for Depth Prediction-
[pdf]
[bibtex]@InProceedings{Ismail_2026_CVPR, author = {Ismail, Nazrul and Malik, Owais Ahmed and Hong, Ong Wee}, title = {Visual2Echo Compositional Contrastive Learning (V2E-CCL): Binaural Knowledge Distilled Network for Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6019-6028} }
A Low-Rank Learning Framework Integrating Detection, Masking, and Recovery for Occluded Facial Expression Recognition-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yanzhong and Shi, Daming}, title = {A Low-Rank Learning Framework Integrating Detection, Masking, and Recovery for Occluded Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6528-6537} }
InstructTable: Improving Table Structure Recognition Through Instruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Boming and Wang, Zining and Guo, Zhentao and Liu, Jianqiang and Duan, Chen and Gu, Yu and zhou, Kai and Yan, Pengfei}, title = {InstructTable: Improving Table Structure Recognition Through Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2742-2752} }
Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Wenjie and Li, Jia and Dong, Xin and Tian, Yapeng and Xiang, Yu and Guo, Yunhui}, title = {Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6789-6798} }
Towards Robust Content Watermarking Against Removal and Forgery Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yifan and Wang, Yihan and Gao, Xiao-Shan}, title = {Towards Robust Content Watermarking Against Removal and Forgery Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8059-8069} }
No Cache Left Idle: Accelerating diffusion model via Extreme-Slimming Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Tingyan and Li, Haoyu and Chen, Yihuang and Zhou, Xing and Zhu, Lifei and Wang, XueQian}, title = {No Cache Left Idle: Accelerating diffusion model via Extreme-Slimming Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4108-4117} }
ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Darabi_2026_CVPR, author = {Darabi, Nastaran and Trivedi, Amit Ranjan}, title = {ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9013-9022} }
Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yadang and Liu, Qi and Zhang, Guoqing and Sun, Le and Zheng, Yuhui}, title = {Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7841-7851} }
LoViC: Efficient Long Video Generation with Context Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jiaxiu and Li, Wenbo and Ren, Jingjing and Qiu, Yuping and Pei, Renjing and Song, Fenglong and Guo, Yong and Xu, Xiaogang and Wu, Han and Zuo, Wangmeng}, title = {LoViC: Efficient Long Video Generation with Context Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4022-4034} }
TalkVid: A Large-Scale Diversified Dataset for Audio-Driven Talking Head Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shunian and Huang, Hejin and Liu, Yexin and Ye, Zihan and Chen, Pengcheng and Zhu, Chenghao and Guan, Michael and Wang, Rongsheng and Chen, Junying and Hou, Jianye and Li, Bo and Li, Guanbin and Lim, Ser-Nam and Yang, Harry and Wang, Benyou}, title = {TalkVid: A Large-Scale Diversified Dataset for Audio-Driven Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3492-3500} }
LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haichao and Lu, Yao and Wang, Lichen and Li, Yunzhe and Chen, Daiwei and Xu, Yunpeng and Fu, Yun}, title = {LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7111-7121} }
AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jintao and Dong, Bowen and Shi, Weikang and Lei, Chenyang and Zhang, Suiyun and Liu, Rui and Liu, Xihui}, title = {AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1797-1807} }
LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Woo-Jin and Paek, Dong-Hee and Kong, Seung-Hyun}, title = {LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {889-899} }
VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chenglin and Chen, Qianglong and Han, Feng and Wang, Yikun and Yin, Xingxi and Gong, Yan and Li, Ruilin and Zhang, Yin and Wang, Jiaqi}, title = {VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8226-8236} }
EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Runze and Zhai, Yuwen and Xu, Bo and Xu, Liwu and Shi, Nian and Zhang, Wei and Lin, Ran and Wang, Liang}, title = {EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9347-9356} }
Loom: Diffusion-Transformer for Interleaved Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Mingcheng and Liu, Jiaming and Song, Yiren}, title = {Loom: Diffusion-Transformer for Interleaved Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4582-4592} }
Concept Erasure via Attention Redirection-
[pdf]
[supp]
[bibtex]@InProceedings{Schechter_2026_CVPR, author = {Schechter, Amit and Gal, Rinon and Kedem, Ofir and Chechik, Gal and Cohen-Or, Daniel}, title = {Concept Erasure via Attention Redirection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4572-4581} }
FIRE-CIR: Fine-grained Reasoning for Composed Fashion Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Garderes_2026_CVPR, author = {Gard\`eres, Fran\c{c}ois and Gauthier, Camille-Sovanneary and Ponce, Jean and Chen, Shizhe}, title = {FIRE-CIR: Fine-grained Reasoning for Composed Fashion Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5694-5703} }
BridgeDiffusion: Latent Space Optimization for Independent Body-Part Generation with Motion Consistency Bridges in Interactive Dance-
[pdf]
[supp]
[bibtex]@InProceedings{Huo_2026_CVPR, author = {Huo, Yufei and Li, Ao and Dai, Wenxun and Wu, Songli and Tang, Yansong}, title = {BridgeDiffusion: Latent Space Optimization for Independent Body-Part Generation with Motion Consistency Bridges in Interactive Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3449-3459} }
DINO-VO: Learning Where to Focus for Enhanced State Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Qi and Li, Guanghao and Hu, Sijia and Gao, Xin and Ma, Junpeng and Xue, Xiangyang and Pu, Jian}, title = {DINO-VO: Learning Where to Focus for Enhanced State Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1556-1566} }
VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Minghong and Wang, Qiulin and Ye, Zongli and Liu, Wenze and Liu, Quande and Ye, Weicai and Wang, Xintao and Wan, Pengfei and Gai, Kun and Yue, Xiangyu}, title = {VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4475-4485} }
V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zixu and Hu, Jian and Liu, Ziquan and Si, Chenyang and Li, Wei and Gong, Shaogang}, title = {V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9155-9164} }
GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tanay_2026_CVPR, author = {Tanay, Thomas and Brahimi, Mohammed and Nazarczuk, Michal and Zhang, Qingwen and Catley-Chandar, Sibi and Moreau, Arthur and Zhang, Zhensong and P\'erez-Pellitero, Eduardo}, title = {GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {348-359} }
Fashion130K: An E-commerce Fashion Dataset for Outfit Generation with Unified Multi-modal Condition-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yu and Zhu, Ting and Liu, Yichun and Ma, Lichen and Shan, Xinyuan and Fu, Jingling and Shi, Yu and Huang, Junshi and Li, Yan}, title = {Fashion130K: An E-commerce Fashion Dataset for Outfit Generation with Unified Multi-modal Condition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4769-4779} }
PoseGen: In-Context LoRA Finetuning for Pose-Controllable Long Human Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Jingxuan and Su, Busheng and Wong, Finn}, title = {PoseGen: In-Context LoRA Finetuning for Pose-Controllable Long Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4780-4789} }
Perturb and Recover: Fine-Tuning for Effective Backdoor Removal from CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Naman Deep and Croce, Francesco and Hein, Matthias}, title = {Perturb and Recover: Fine-Tuning for Effective Backdoor Removal from CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6164-6173} }
The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Yadav_2026_CVPR, author = {Yadav, Garima Arya and Yilmaz, Nilay and Yang, Yezhou}, title = {The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2069-2078} }
Vision-Language Models for Automated 3D PET/CT Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Wenpei and Yan, Ke and Zhang, Jiajin and Jin, Dakai and Xie, Zhaoheng}, title = {Vision-Language Models for Automated 3D PET/CT Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5295-5304} }
SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Kang and Xiang, Wei and Yu, Lu and Wyatt, Mathew and Liu, Gaowen and Kompella, Ramana Rao}, title = {SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {12-21} }
Temporally Consistent Long-Term Memory for 3D Single Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoo_2026_CVPR, author = {Yoo, Jaejoon and Lee, SuBeen and Jeon, Yerim and Lee, Miso and Heo, Jae-Pil}, title = {Temporally Consistent Long-Term Memory for 3D Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8388-8397} }
SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Zhimin and Yadav, Abhay and Chellappa, Rama and Peng, Cheng}, title = {SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {253-263} }
Animated-ART: Multi-Layer Transparent Video Generation-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ziqiang and Wang, Yunnan and Chen, Dong and Dong, Yue and Li, Ji and Yuan, Yuhui and Jin, Xin}, title = {Animated-ART: Multi-Layer Transparent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4150-4159} }
Why MLLMs Struggle to Determine Object Orientations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gopinath_2026_CVPR, author = {Gopinath, Anju and Krishnaswamy, Nikhil and Draper, Bruce}, title = {Why MLLMs Struggle to Determine Object Orientations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9836-9845} }
GLOW: Global Illumination-Aware Inverse Rendering of Indoor Scenes Captured with Dynamic Co-Located Light & Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jiaye and Hadadan, Saeed and Lin, Geng and Tu, Peihan and Zwicker, Matthias and Jacobs, David and Sengupta, Roni}, title = {GLOW: Global Illumination-Aware Inverse Rendering of Indoor Scenes Captured with Dynamic Co-Located Light \& Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6445-6455} }
PEDRA: Evaluating the Realism of Pedestrian Dynamics in Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Appelle_2026_CVPR, author = {Appelle, Aaron and Lynch, Jerome P.}, title = {PEDRA: Evaluating the Realism of Pedestrian Dynamics in Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4461-4474} }
Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tissera_2026_CVPR, author = {Tissera, Dumindu and Awadallah, Omar and Danish, Muhammad Umair and Sadhu, Ayan and Grolinger, Katarina}, title = {Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2429-2439} }
Attention-Guided Energy Optimization for Label-Aligned Anomaly Generation-
[pdf]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Zhibin and Gao, Zhiqiang and Sun, Mingjie and Wu, Yupei and Fu, Guohong and Yi, Ran}, title = {Attention-Guided Energy Optimization for Label-Aligned Anomaly Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4170-4179} }
ConSel: Concept-Aware Self-supervised Learning for Regression Beyond Ordinal Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Tariq_2026_CVPR, author = {Tariq, Abdullah and Saleem, Bisma and Azad, R Muhammad Atif and Masek, Martin and Gilani, Syed Zulqarnain}, title = {ConSel: Concept-Aware Self-supervised Learning for Regression Beyond Ordinal Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6548-6559} }
ColorMam: Color-Aware State Space Model for Image Color Style Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jian and Peng, Jiaxin and Li, Yuchen and Zhou, Siwang}, title = {ColorMam: Color-Aware State Space Model for Image Color Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4379-4388} }
Mull-Tokens: Modality-Agnostic Latent Thinking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ray_2026_CVPR, author = {Ray, Arijit and Abdelkader, Ahmed and Mao, Chengzhi and Plummer, Bryan A. and Saenko, Kate and Krishna, Ranjay and Guibas, Leonidas and Chu, Wen-Sheng}, title = {Mull-Tokens: Modality-Agnostic Latent Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9477-9488} }
RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alama_2026_CVPR, author = {Alama, Omar and Jariwala, Darshil and Bhattacharya, Avigyan and Kim, Seungchan and Wang, Wenshan and Scherer, Sebastian}, title = {RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9294-9304} }
Bind-Your-Avatar: Multi-Character-Talking Video Generation with Dynamic 3D-mask-based Embedding Router-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yubo and Wang, Weiqiang and Zhao, Sirui and Xu, Tong and Liu, Lin and Chen, Enhong}, title = {Bind-Your-Avatar: Multi-Character-Talking Video Generation with Dynamic 3D-mask-based Embedding Router}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4440-4449} }
Hoi3DGen: Generating High-Quality Human-Object-Interactions in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharma_2026_CVPR, author = {Sharma, Agniv and Xie, Xianghui and Fischer, Tom and Ilg, Eddy and Pons-Moll, Gerard}, title = {Hoi3DGen: Generating High-Quality Human-Object-Interactions in 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3405-3416} }
ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Junhao and Zeng, Deyu and Pang, Junhao and Li, Lini and Zhong, Xiaopin and Wu, Zongze}, title = {ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {295-305} }
UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vishal_2026_CVPR, author = {Vishal, Joseph Raj and Poluri, Nagasiri and Naik, Katha and Patil, Rutuja and Kota, Kashyap Hegde and Vinod, Krishna and Ramesh, Prithvi Jai and Farhadi, Mohammad and Yang, Yezhou and Chakravarthi, Bharatesh}, title = {UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1862-1871} }
Unbiased Dynamic Multimodal Fusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Shicai and Zhang, Kaijie and Chen, Luyi and He, Tao and Duan, Guiduo}, title = {Unbiased Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6239-6249} }
Future Optical Flow Prediction Improves Robot Control and Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Ranasinghe_2026_CVPR, author = {Ranasinghe, Kanchana and Zhou, Honglu and Fang, Yu and Yang, Luyu and Xue, Le and Xu, Ran and Xiong, Caiming and Savarese, Silvio and Ryoo, Michael S and Niebles, Juan Carlos}, title = {Future Optical Flow Prediction Improves Robot Control and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4528-4540} }
ExposeAnyone: Personalized Audio-to-Expression Diffusion Models Are Robust Zero-Shot Face Forgery Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shiohara_2026_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko and Golyanik, Vladislav}, title = {ExposeAnyone: Personalized Audio-to-Expression Diffusion Models Are Robust Zero-Shot Face Forgery Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3665-3676} }
SPHINX: A Synthetic Environment for Visual Perception and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alam_2026_CVPR, author = {Alam, Md Tanvirul and Aggarwal, Saksham and Chae, Justin Yang and Rastogi, Nidhi}, title = {SPHINX: A Synthetic Environment for Visual Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9489-9499} }
MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhiyu and Zhou, Zewei and Cai, Tianhui and Zhang, Yun and Ma, Jiaqi}, title = {MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {878-888} }
OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shipard_2026_CVPR, author = {Shipard, Jordan and Wiliem, Arnold and Thanh, Kien Nguyen and Xiang, Wei and Fookes, Clinton}, title = {OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6768-6778} }
OminiControl2: Efficient Conditioning for Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Zhenxiong and Xue, Qiaochu and Yang, Xingyi and Liu, Songhua and Wang, Xinchao}, title = {OminiControl2: Efficient Conditioning for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4256-4265} }
Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Woojun and Go, Jaehoon and Jeon, Mingyu and Yoon, Sunjae and Kim, Junyeong}, title = {Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8962-8971} }
HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Zhinan and Wang, Peisong and Qiu, Shuang and Cheng, Jian}, title = {HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8952-8961} }
MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zhilin and Zhang, Zhihui and Sun, Shiliang and Zhao, Jing and Yang, Hao}, title = {MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2089-2099} }
Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Jin, Woojeong and Son, Soowon and Seo, Junyoung and Cho, Seokju and Baek, JeongYeol and Lee, Byeongwon and Lee, JoungBin and Kim, Seungryong}, title = {Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8640-8650} }
Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation-
[pdf]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Jianhang and Cheng, Zhiming and Zhao, Jianxiang and Ma, Bingtao and Chen, Hao and Gao, Yuhan and Zhang, Lian and Ying, Zuobin and Wang, Shuai}, title = {Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7799-7809} }
Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chengzhi and Yang, Yuzhe and Fan, Yue and Wei, Qingyue and Liu, Sheng and Wang, Xin Eric}, title = {Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9225-9236} }
ProSM: Progressive Soft Masking for Fine-Grained Remote Image Segmentation-
[pdf]
[bibtex]@InProceedings{Nian_2026_CVPR, author = {Nian, Bingkun and Tang, Fenghe and Ning, Zhiwei and Jiang, Dongsheng and Li, Yin and Yang, JIE and Xiao, Rong and Zhou, Shaohua Kevin and Liu, Wei}, title = {ProSM: Progressive Soft Masking for Fine-Grained Remote Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6371-6381} }
SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ruibin and Lin, Zhenyu and Zhao, Xinhai}, title = {SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8796-8805} }
Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Wouladje_2026_CVPR, author = {Wouladje, Cabrel and Mumanikidzwa, Golden Tendekai and Islam, Md Apon and Xu, Huiying and Li, Hongbo and Tan, Wenzhe and Chen, Zhendong and Zhu, Xinzhong}, title = {Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1567-1576} }
Instant Colorization of Gaussian Splats-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lieber_2026_CVPR, author = {Lieber, Daniel and Mock, Alexander and Wandel, Nils}, title = {Instant Colorization of Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {170-180} }
Low-Bitrate Video Compression through Semantic-Conditioned Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lingdong and Su, Guan-Ming and Kothandaraman, Divya and Huang, Tsung-Wei and Hajiesmaili, Mohammad and Sitaraman, Ramesh K.}, title = {Low-Bitrate Video Compression through Semantic-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4495-4505} }
Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yibing and Katakol, Sudeep and Brack, Manuel and Lin, Jinhong and Bai, Haoyue and Li, Yu-Teng and Zhang, Richard and Shechtman, Eli and Ravi, Hareesh and Kale, Ajinkya}, title = {Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1883-1892} }
Rethinking Medical High-Modality Learning Under Missingness -- A Long-Tailed Distribution Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chenwei and Shuai, Zitao and Shen, Liyue}, title = {Rethinking Medical High-Modality Learning Under Missingness -- A Long-Tailed Distribution Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5641-5651} }
Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hongli and Wang, Yu and Zhao, Shengjie}, title = {Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7419-7428} }
Uncertainty-Aware Cross-Modal Opinion Interaction: A General Frameworkfor Visible-Infrared Vehicle and Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Shihao and Liu, Hongying and Shang, Fanhua and Wang, Qian and Song, Yang}, title = {Uncertainty-Aware Cross-Modal Opinion Interaction: A General Frameworkfor Visible-Infrared Vehicle and Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6476-6485} }
A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Demirel_2026_CVPR, author = {Demirel, Mehmet and Kyrkou, Christos}, title = {A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7541-7551} }
Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Huaiyuan and Yang, Muli and Goenawan, Gabriel James and Wang, Kai and Wang, Zheng and Hu, Peng and Peng, Xi and Zhu, Hongyuan}, title = {Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3029-3039} }
Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Xiangjun and Zhang, Zhensong and Chen, Dave Zhenyu and Xu, Songcen and Quan, Long and P\'erez-Pellitero, Eduardo and Jang, Youngkyoon}, title = {Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7154-7164} }
On the Group Disparities Arising from Machine Unlearning-
[pdf]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Zijie and Ying, Zuobin and Wang, Yajie and Zhu, Liehuang and Zhou, Wanlei}, title = {On the Group Disparities Arising from Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8133-8142} }
Language-Augmented Semantic Priors for B-Spline Surface Fitting-
[pdf]
[supp]
[bibtex]@InProceedings{Lou_2026_CVPR, author = {Lou, Yunzhong and Luo, Yusheng and Li, Jiahao and Song, Yu and Zhou, Xiangdong}, title = {Language-Augmented Semantic Priors for B-Spline Surface Fitting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9120-9130} }
FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Jingchen and Zhang, Quan and Jiang, Dan and Lv, Keyu and Zhang, Ke and Yuan, Chun}, title = {FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7439-7449} }
Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Zhenghui and Man, Yuanbin and Sheng, Junyuan and Lin, Bowen and Ahmed, Ahmed and Jiang, Bo and Zhang, Boyuan and Yin, Miao and Jin, Sian and Gnawali, Omprakash and Zhang, Chengming}, title = {Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3060-3069} }
Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Costanzino_2026_CVPR, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8816-8825} }
LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yilong and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {191-201} }
Harnessing Layered Graphic Designs with Real Intentions for Text-to-Design Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Xinya and Yang, Bo and Cao, Ying}, title = {Harnessing Layered Graphic Designs with Real Intentions for Text-to-Design Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4729-4738} }
COSTA: Collaborative Open-Set Test-Time Adaptation Through Robust Prototype Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Can and Li, Ruirui}, title = {COSTA: Collaborative Open-Set Test-Time Adaptation Through Robust Prototype Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6154-6163} }
Generative Vision-Language Multiple Instance Learning for Weakly Supervised Neonatal Fundus Screening and Reporting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiao and Tan, Guangshuang and Hu, Jie and Kan, Shichao and Jiang, Bing and Liang, Yixiong}, title = {Generative Vision-Language Multiple Instance Learning for Weakly Supervised Neonatal Fundus Screening and Reporting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5190-5200} }
World Model Robustness via Surprise Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zollicoffer_2026_CVPR, author = {Zollicoffer, Geigh and Chopra, Tanush and Yan, Mingkuan and Ma, Xiaoxu and Eaton, Kenneth and Riedl, Mark}, title = {World Model Robustness via Surprise Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3146-3155} }
CoherentHand: Temporally Consistent 3D Hand Trajectory Synthesis with Semantic Motion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Boote_2026_CVPR, author = {Boote, Bikram and Kim, Junho and Kara, Ozgur and Lee, Sangmin and Rehg, James M}, title = {CoherentHand: Temporally Consistent 3D Hand Trajectory Synthesis with Semantic Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3417-3427} }
U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Smith_2026_CVPR, author = {Smith, Michael and Ferrie, Frank P.}, title = {U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1850-1861} }
Learning Spatial-Preserving Hierarchical Representations for Digital Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Weiyi and Diao, Xingjian and Zhang, Chunhui and Gao, Chongyang and Xu, Xinwen and Li, Siting and Gui, Jiang}, title = {Learning Spatial-Preserving Hierarchical Representations for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5484-5494} }
Learning to Walk the Right Paths: Task-Responsive Graph Reasoning for Multimodal Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuecheng and Jia, Weikuan and Zheng, Yuanjie}, title = {Learning to Walk the Right Paths: Task-Responsive Graph Reasoning for Multimodal Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6040-6050} }
Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Younggun and Swetha, Sirnam and Kagdi, Fazil and Shah, Mubarak}, title = {Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2100-2110} }
RectifiedHR: Enable Efficient High-Resolution Synthesis via Energy Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhen and Shen, Guibao and Li, Minyang and Hou, Liang and Liu, Mushui and Wang, Luozhou and Tao, Xin and Chen, Ying-Cong}, title = {RectifiedHR: Enable Efficient High-Resolution Synthesis via Energy Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3809-3819} }
PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jinkai and Zhang, Zhenliang and Fan, Lifeng and Wang, Wei}, title = {PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1976-1985} }
MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yisong and Yao, He and Cheng, Junlong and Lu, Yujie and Bai, Junqi and Zhu, Min}, title = {MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8378-8387} }
Video Reasoning Without Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Deepak and Bhardwaj, Kartikeya and Jeyaraj, Jeya Pradha and Vasconcelos, Nuno and Nayak, Ankita and Teague, Harris}, title = {Video Reasoning Without Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6250-6260} }
LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Chowdhury_2026_CVPR, author = {Chowdhury, Md Abtahi Majeed and Rahman, Md Rifat Ur and Taki, Akil Ahmad}, title = {LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1640-1649} }
Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haihao and Dong, Siwei and Li, Jianing and Zhao, Rui and Zhang, Yunjian and Qin, Geng and Zhu, Lin}, title = {Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3220-3230} }
CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Qiyu and Qu, Zhen and Luo, Wei and Yao, Haiming and Cao, Yunkang and Jiang, Yuxin and Duan, Yinan and Luo, Huiyuan and Lv, Chengkan and Zhang, Zhengtao}, title = {CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8554-8563} }
Test-Time Distillation for Continual Model Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Huang, Jiazhen and Liu, Zhiming and Jiang, Qinting and Huang, Fanding and Jiang, Jingyan and Wang, Zhi}, title = {Test-Time Distillation for Continual Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7593-7604} }
Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiu_2026_CVPR, author = {Xiu, Yanming and Jiang, Zhengyuan and Gong, Neil Zhenqiang and Gorlatova, Maria}, title = {Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9110-9119} }
Disrupting Positional Encoding for Effective Open Set Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yu and Xie, Jiabo and Zhou, Yucan and Mu, Junxian and Hu, Qinghua and Zhu, Pengfei}, title = {Disrupting Positional Encoding for Effective Open Set Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6633-6642} }
Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jianglin and Wu, Yuanwei and Zhao, Ziyi and Wang, Hongcheng and Jimenez, Felix and Majeedi, Abrar and Fu, Yun}, title = {Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8629-8639} }
Towards Source-Aware Object Swapping with Initial Noise Perturbation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jiahui and Sun, Xianbing and Zhu, Xiangnan and Ji, Yikun and Liu, Ruitong and Zhang, Liqing and Zhang, Jianfu}, title = {Towards Source-Aware Object Swapping with Initial Noise Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4400-4409} }
SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krukowski_2026_CVPR, author = {Krukowski, Patryk and Gorczyca, Lukasz and Helm, Piotr and Ksiazek, Kamil and Spurek, Przemyslaw}, title = {SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2377-2386} }
MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinying and Jing, Junfeng and Wu, Tong and Gao, Tian and Sheng, Zhihong}, title = {MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2585-2595} }
Bi-Level Optimization for Single Domain Generalization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Heidari_2026_CVPR, author = {Heidari, Marzi and Zhang, Hanping and Yan, Hao and Guo, Yuhong}, title = {Bi-Level Optimization for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6685-6694} }
NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Pengcheng and Hu, Yue and Li, Wenhao and Gunderson, Nicole M and Feng, Andrew and Sun, Zhenglong and Beerel, Peter and Seibel, Eric J}, title = {NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {403-413} }
FLToM: Robust Federated Learning with Theory-of-Mind Structure-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Tianshu and Yang, Liu and Guo, Sichang and Wang, Qilong and Hu, Qinghua}, title = {FLToM: Robust Federated Learning with Theory-of-Mind Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2503-2513} }
TalkVerse: Democratizing Minute-Long Audio-Driven Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhenzhi and Wang, Jian and Ma, Ke and Lin, Dahua and Zhou, Bing}, title = {TalkVerse: Democratizing Minute-Long Audio-Driven Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4516-4527} }
RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baltaxe_2026_CVPR, author = {Baltaxe, Michael and Levi, Dan and Benaim, Sagie}, title = {RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {558-568} }
EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinan and Irshad, Muhammad Zubair and Yezzi, Anthony and Tsai, Yi-Chang and Kira, Zsolt}, title = {EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8846-8856} }
G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Yokoya, Naoto}, title = {G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {518-527} }
Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singhi_2026_CVPR, author = {Singhi, Nishad and Bialas, Christian and Jauhri, Snehal and Prasad, Vignesh and Chalvatzaki, Georgia and Rohrbach, Marcus and Rohrbach, Anna}, title = {Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3124-3135} }
Linear Recurrent Unit with Semantic Modulation for Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Mingyu and Han, Woo Kyoung and Im, Sunghoon and Jin, Kyong Hwan}, title = {Linear Recurrent Unit with Semantic Modulation for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4950-4960} }
Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dongfang_2026_CVPR, author = {Dongfang, Zihao and Zheng, Xu and Weng, Ziqiao and Lyu, Yuanhuiyi and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun and Hu, Xuming}, title = {Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9759-9769} }
RodNet: Visual Pathway-Inspired Adaptive Sparse Network for Efficient Low-Light Image Enhancement-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Boheng and Li, Ziyu and Zhang, Zhong and Xu, Mengrui and Duan, Chenghua and Liu, Dehao and Li, Qing and Wu, Xia}, title = {RodNet: Visual Pathway-Inspired Adaptive Sparse Network for Efficient Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4961-4970} }
SCOPE: Spatially Ordered Continual Learning for 3D Segmentation-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Wenhao and Zhang, Huaidong and Zhang, Weipeng and Zhang, Qianle and He, Shengfeng}, title = {SCOPE: Spatially Ordered Continual Learning for 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7862-7871} }
Back

