CVPR 2026 Open Access Repository

Findings

Back
Revisiting Real-Time Detection Transformer with Efficient Encoder Design: Jiannan Huang,

Aditya Kane,

Fengzhe Zhou,

Yunchao Wei,

Humphrey Shi; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiannan and Kane, Aditya and Zhou, Fengzhe and Wei, Yunchao and Shi, Humphrey}, title = {Revisiting Real-Time Detection Transformer with Efficient Encoder Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6859-6868} }
Spatial Transcriptomics as Images for Large-Scale Pretraining: Yishun Zhu,

Jiaxin Qi,

Jian Wang,

Yuhua Zheng,

Jianqiang Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yishun and Qi, Jiaxin and Wang, Jian and Zheng, Yuhua and Huang, Jianqiang}, title = {Spatial Transcriptomics as Images for Large-Scale Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1191-1200} }
GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting: Jialin Li,

Bin Fu,

Ruiping Wang,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jialin and Fu, Bin and Wang, Ruiping and Chen, Xilin}, title = {GEAR: GEometry-Motion Alternating Refinement for Articulated Object Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {264-274} }
Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models: Hoin Jung,

Shenyu Lu,

De Wang,

Xiaoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Hoin and Lu, Shenyu and Wang, De and Wang, Xiaoqian}, title = {Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7956-7967} }
Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency: Erwei Zhao,

Haijin Zeng,

Weiwei Xiao,

Shijie Cao,

Qiben Shan,

Shaocong Wu,

Jingyong Su,

Jie Liu; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Erwei and Zeng, Haijin and Xiao, Weiwei and Cao, Shijie and Shan, Qiben and Wu, Shaocong and Su, Jingyong and Liu, Jie}, title = {Mix-to-Max: Optimizing Data Mixtures for Peak Vision-Language Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2937-2946} }
AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens: Purvish Jajal,

Nicholas John Eliopoulos,

Benjamin Shiue-Hal Chou,

George K Thiruvathukal,

Yung-Hsiang Lu,

James C. Davis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Lu, Yung-Hsiang and Davis, James C.}, title = {AdaPerceiver: Transformers with Adaptive Width, Depth, and Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2618-2628} }
Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach: Youngjun Choi,

Joonseong Kang,

Sungjun Lim,

Kyungwoo Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Youngjun and Kang, Joonseong and Lim, Sungjun and Song, Kyungwoo}, title = {Eigen-Value: Efficient Domain-Robust Data Valuation Via Eigenvalue-Based Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2387-2397} }
CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning: Meiqi Wang,

Longnyu Xu,

Jun Liu,

Hewu Li,

Han Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Meiqi and Xu, Longnyu and Liu, Jun and Li, Hewu and Qiu, Han}, title = {CoTFly: Making UAVs Think Where to Fly Next Through Visual Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1482-1491} }
Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation: Shristi Das Biswas,

Arani Roy,

Kaushik Roy; [pdf] [supp]
[bibtex]
@InProceedings{Das_Biswas_2026_CVPR, author = {Das Biswas, Shristi and Roy, Arani and Roy, Kaushik}, title = {Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7903-7913} }
AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models: Tianyi Yan,

Tao Tang,

Xingtai Gui,

Yongkang Li,

Jiasen Zheng,

Weiyao Huang,

Lingdong Kong,

Wencheng Han,

Xia Zhou,

Xueyang Zhang,

Yifei Zhan,

Kun Zhan,

Cheng-zhong Xu,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Tianyi and Tang, Tao and Gui, Xingtai and Li, Yongkang and Zheng, Jiasen and Huang, Weiyao and Kong, Lingdong and Han, Wencheng and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {AD-R1: Closed-Loop Reinforcement Learning for End-to-End Autonomous Driving with Impartial World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1085-1095} }
Towards Generalization of Scene Text Tampering Localization via Causal Invariance: Huiru Shao,

Bin Dong,

Kaizhu Huang,

Xiaowei Huang,

Qiufeng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Huiru and Dong, Bin and Huang, Kaizhu and Huang, Xiaowei and Wang, Qiufeng}, title = {Towards Generalization of Scene Text Tampering Localization via Causal Invariance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7262-7271} }
TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation: K Naveen Kumar,

Mohsen Guizani; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, K Naveen and Guizani, Mohsen}, title = {TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7945-7955} }
CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images: Chengqi Duan,

Kaiyue Sun,

Rongyao Fang,

Manyuan Zhang,

Yan Feng,

Ying Luo,

Yufang Liu,

Ke Wang,

Peng Pei,

Xunliang Cai,

Hongsheng Li,

Yi Ma,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Chengqi and Sun, Kaiyue and Fang, Rongyao and Zhang, Manyuan and Feng, Yan and Luo, Ying and Liu, Yufang and Wang, Ke and Pei, Peng and Cai, Xunliang and Li, Hongsheng and Ma, Yi and Liu, Xihui}, title = {CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9586-9596} }
Causal Chain-Guided Reasoning for Modular and Explainable Causal-Why Video Question Answering: Paritosh Parmar,

Eric Peh,

Basura Fernando; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parmar_2026_CVPR, author = {Parmar, Paritosh and Peh, Eric and Fernando, Basura}, title = {Causal Chain-Guided Reasoning for Modular and Explainable Causal-Why Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5860-5870} }
What Matters for Scalable and Robust Learning in End-to-End Driving Planners?: David Holtz,

Niklas Hanselmann,

Simon Doll,

Marius Cordts,

Bernt Schiele; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Holtz_2026_CVPR, author = {Holtz, David and Hanselmann, Niklas and Doll, Simon and Cordts, Marius and Schiele, Bernt}, title = {What Matters for Scalable and Robust Learning in End-to-End Driving Planners?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {931-941} }
Optimal-Transport-based Feature Alignment for Multimodal Change Detection: Mengqi Huang,

Jun Liu,

Li Cui,

Yuping Duan,

Faqiang Wang; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Mengqi and Liu, Jun and Cui, Li and Duan, Yuping and Wang, Faqiang}, title = {Optimal-Transport-based Feature Alignment for Multimodal Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6342-6351} }
Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas: Felix Wimbauer,

Fabian Manhardt,

Michael Oechsle,

Nikolai Kalischek,

Christian Rupprecht,

Daniel Cremers,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wimbauer_2026_CVPR, author = {Wimbauer, Felix and Manhardt, Fabian and Oechsle, Michael and Kalischek, Nikolai and Rupprecht, Christian and Cremers, Daniel and Tombari, Federico}, title = {Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4541-4551} }
Active Exploration for Sparse Visual Localization: Johanna Lidholm,

Ludvig Dillén,

Zuzana Kukelova,

Torsten Sattler,

Viktor Larsson; [pdf] [supp]
[bibtex]
@InProceedings{Lidholm_2026_CVPR, author = {Lidholm, Johanna and Dill\'en, Ludvig and Kukelova, Zuzana and Sattler, Torsten and Larsson, Viktor}, title = {Active Exploration for Sparse Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {338-347} }
IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes: Carl Lindström,

Mahan Rafidashti,

Maryam Fatemi,

Lars Hammarstrand,

Martin R. Oswald,

Lennart Svensson; [pdf] [supp]
[bibtex]
@InProceedings{Lindstrom_2026_CVPR, author = {Lindstr\"om, Carl and Rafidashti, Mahan and Fatemi, Maryam and Hammarstrand, Lars and Oswald, Martin R. and Svensson, Lennart}, title = {IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {316-326} }
MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation: Mengxi Wu,

Long Zhou,

Zhixia Li,

Adrian Kwan,

Denis Laprise,

Hengyi Huang,

Xiaqing Wu,

Shuang Wu; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mengxi and Zhou, Long and Li, Zhixia and Kwan, Adrian and Laprise, Denis and Huang, Hengyi and Wu, Xiaqing and Wu, Shuang}, title = {MapGPT: A Vision-Language Model for Large-Scale High-Definition Map Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {990-999} }
SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers: Oded Schlesinger,

Amirhossein Farzam,

J. Matias Di Martino,

Guillermo Sapiro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schlesinger_2026_CVPR, author = {Schlesinger, Oded and Farzam, Amirhossein and Di Martino, J. Matias and Sapiro, Guillermo}, title = {SPOT: Sparsification with Attention Dynamics via Token Relevance in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2335-2345} }
CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities: Moritz Nottebaum,

Matteo Dunnhofer,

Christian Micheloni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nottebaum_2026_CVPR, author = {Nottebaum, Moritz and Dunnhofer, Matteo and Micheloni, Christian}, title = {CPUBone: Efficient Vision Backbone Design for Devices with Low Parallelization Capabilities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2493-2502} }
Multimodal Decoupled Dynamic Graph Learning for Brain Disease Diagnosis: Aimei Dong,

Yongxing Cai,

Bin Liu,

Jiale Sun,

Guixin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Aimei and Cai, Yongxing and Liu, Bin and Sun, Jiale and Zhao, Guixin}, title = {Multimodal Decoupled Dynamic Graph Learning for Brain Disease Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5430-5439} }
How to Achieve Prototypical Birth and Death for OOD Detection?: Ningkang Peng,

Qianfeng Yu,

Xiaoqian Peng,

Linjing Qian,

Yafei Liu,

Canran Xiao,

Xinyu Lu,

Tingyu Lu,

Zhichao Zheng,

Yanhui Gu; [pdf] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Ningkang and Yu, Qianfeng and Peng, Xiaoqian and Qian, Linjing and Liu, Yafei and Xiao, Canran and Lu, Xinyu and Lu, Tingyu and Zheng, Zhichao and Gu, Yanhui}, title = {How to Achieve Prototypical Birth and Death for OOD Detection?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6466-6475} }
2D Triangle Splatting for Direct Differentiable Mesh Training: Kaifeng Sheng,

Zheng Zhou,

Yingliang Peng,

Qianwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Kaifeng and Zhou, Zheng and Peng, Yingliang and Wang, Qianwei}, title = {2D Triangle Splatting for Direct Differentiable Mesh Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {285-294} }
TextBind: Your Vision-Language Models are Naturally Unified Multimodal Models: Xu Ma,

Yun Fu; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Xu and Fu, Yun}, title = {TextBind: Your Vision-Language Models are Naturally Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6029-6039} }
DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings: Yoonhwa Jung,

Junryu Fu,

Mani Golparvar-Fard; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Yoonhwa and Fu, Junryu and Golparvar-Fard, Mani}, title = {DrawingVQA: A Real-World Benchmark for Multi-Depth Visual-Textual Reasoning on Construction Drawings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2121-2130} }
Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather: Nikhil Kumar Jangamreddy,

Mahsa Baktashmotlagh,

Chetan Arora; [pdf] [supp]
[bibtex]
@InProceedings{Jangamreddy_2026_CVPR, author = {Jangamreddy, Nikhil Kumar and Baktashmotlagh, Mahsa and Arora, Chetan}, title = {Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7686-7696} }
Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models: Yujun Tong,

Dongliang Chang,

Zijin Yin,

Xintong Liu,

Yuanchen Fang,

Zhanyu Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Tong_2026_CVPR, author = {Tong, Yujun and Chang, Dongliang and Yin, Zijin and Liu, Xintong and Fang, Yuanchen and Ma, Zhanyu}, title = {Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6976-6986} }
VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection: Chupeng Liu,

Jiyong Rao,

Shangquan Sun,

Runkai Zhao,

Weidong Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chupeng and Rao, Jiyong and Sun, Shangquan and Zhao, Runkai and Cai, Weidong}, title = {VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7530-7540} }
Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning: Siqi Yang,

Zilve Gao,

Haibo Qiu,

Fanfan Liu,

Peng Shi,

Zhixiong Zeng,

Qingmin Liao,

Lin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Siqi and Gao, Zilve and Qiu, Haibo and Liu, Fanfan and Shi, Peng and Zeng, Zhixiong and Liao, Qingmin and Ma, Lin}, title = {Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9738-9748} }
Decoupled Scale-wise Autoregressive Modeling for Visual Generation: Sucheng Ren,

Yaodong Yu,

Nataniel Ruiz,

Feng Wang,

Cihang Xie; [pdf]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Sucheng and Yu, Yaodong and Ruiz, Nataniel and Wang, Feng and Xie, Cihang}, title = {Decoupled Scale-wise Autoregressive Modeling for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4506-4515} }
Debiased One-Shot NAS Via Density-Aware Sampling: Mehraveh Javan Roshtkhari,

Matthew Toews,

Marco Pedersoli; [pdf] [supp]
[bibtex]
@InProceedings{Roshtkhari_2026_CVPR, author = {Roshtkhari, Mehraveh Javan and Toews, Matthew and Pedersoli, Marco}, title = {Debiased One-Shot NAS Via Density-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2357-2366} }
Pioneering Perceptual Video Fluency Assessment: A Novel Task with Benchmark Dataset and Baseline: Qizhi Xie,

Kun Yuan,

Yunpeng Qu,

Ming Sun,

Chao Zhou,

Jihong Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Qizhi and Yuan, Kun and Qu, Yunpeng and Sun, Ming and Zhou, Chao and Zhu, Jihong}, title = {Pioneering Perceptual Video Fluency Assessment: A Novel Task with Benchmark Dataset and Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4821-4831} }
Turning Generators into Retrievers: Unlocking MLLMs for Natural Language-Guided Geo-Localization: Yuqi Chen,

Xiaohan Zhang,

Ahmad Arrabi,

Waqas Sultani,

Chen Chen,

Safwan Wshah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuqi and Zhang, Xiaohan and Arrabi, Ahmad and Sultani, Waqas and Chen, Chen and Wshah, Safwan}, title = {Turning Generators into Retrievers: Unlocking MLLMs for Natural Language-Guided Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6331-6341} }
SLAD : Shared LoRA Adapters for Task Specific Distillation: Reda Bensaid,

Yassir Bendou,

Vincent Gripon,

François Leduc-Primeau; [pdf] [supp]
[bibtex]
@InProceedings{Bensaid_2026_CVPR, author = {Bensaid, Reda and Bendou, Yassir and Gripon, Vincent and Leduc-Primeau, Fran\c{c}ois}, title = {SLAD : Shared LoRA Adapters for Task Specific Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2968-2977} }
QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery: Xuan Bac Nguyen,

Hoang-Quan Nguyen,

Sankalp Pandey,

Tim Faltermeier,

Nicholas Borys,

Hugh Churchill,

Khoa Luu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Xuan Bac and Nguyen, Hoang-Quan and Pandey, Sankalp and Faltermeier, Tim and Borys, Nicholas and Churchill, Hugh and Luu, Khoa}, title = {QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8684-8694} }
DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding: Tanveer Hannan,

Dimitrios Mallios,

Parth Pathak,

Faegheh Sardari,

Thomas Seidl,

Gedas Bertasius,

Mohsen Fayyaz,

Sunando Sengupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hannan_2026_CVPR, author = {Hannan, Tanveer and Mallios, Dimitrios and Pathak, Parth and Sardari, Faegheh and Seidl, Thomas and Bertasius, Gedas and Fayyaz, Mohsen and Sengupta, Sunando}, title = {DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9337-9346} }
OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution: Chong Xia,

Fangfu Liu,

Yule Wang,

Yize Pang,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Liu, Fangfu and Wang, Yule and Pang, Yize and Duan, Yueqi}, title = {OnlineX: Unified Online 3D Reconstruction and Understanding with Active-to-Stable State Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {66-76} }
Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting: Shizhao Gao,

Jun Li,

Qiming Li; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Shizhao and Li, Jun and Li, Qiming}, title = {Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8143-8152} }
Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation: Yizhou Liu,

Dingkang Yang,

Zizhi Chen,

Minghao Han,

Xukun Zhang,

Keliang Liu,

Jingwei Wei,

Lihua Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yizhou and Yang, Dingkang and Chen, Zizhi and Han, Minghao and Zhang, Xukun and Liu, Keliang and Wei, Jingwei and Zhang, Lihua}, title = {Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8651-8660} }
AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks: Xinghan Liu,

Xiao Liu,

Yifan Xu,

Jiaqi Fu,

Jiayu Huang,

Yixuan Liu,

Yuxiao Dong,

Jie Tang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinghan and Liu, Xiao and Xu, Yifan and Fu, Jiaqi and Huang, Jiayu and Liu, Yixuan and Dong, Yuxiao and Tang, Jie}, title = {AndroidLong: LLM-based Android Agents Struggle with Long Looping Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1700-1710} }
Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping: Youngmin Oh,

Changjae Oh,

Bumsub Ham; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Youngmin and Oh, Changjae and Ham, Bumsub}, title = {Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7493-7502} }
Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs: Yujin Jo,

Sangyoon Bae,

Taesup Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jo_2026_CVPR, author = {Jo, Yujin and Bae, Sangyoon and Kim, Taesup}, title = {Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9706-9715} }
OmniMotion-X: Versatile Multimodal Whole-Body Motion Generation: Guowei Xu,

Yuxuan Bian,

Ailing Zeng,

Zhuo Chen,

Mingyi Shi,

Shaoli Huang,

Wen Li,

Lixin Duan,

Qiang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guowei and Bian, Yuxuan and Zeng, Ailing and Chen, Zhuo and Shi, Mingyi and Huang, Shaoli and Li, Wen and Duan, Lixin and Xu, Qiang}, title = {OmniMotion-X: Versatile Multimodal Whole-Body Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3641-3652} }
It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models: Jaeha Choi,

Jin Won Lee,

Siwoo You,

Jangho Lee; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeha and Lee, Jin Won and You, Siwoo and Lee, Jangho}, title = {It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9500-9509} }
CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels: Ping Guo,

Chengzhou Li,

Guanchen Meng,

Qi Jia,

Jinyuan Liu,

Zhu Liu,

Yu Liu,

Zhongxuan Luo,

Xin Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ping and Li, Chengzhou and Meng, Guanchen and Jia, Qi and Liu, Jinyuan and Liu, Zhu and Liu, Yu and Luo, Zhongxuan and Fan, Xin}, title = {CTFS : Collaborative Teacher Framework for Forward-Looking Sonar Image Semantic Segmentation with Extremely Limited Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1660-1669} }
Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models: Shan Zhao,

Zhao Yang,

Tianwei Yan,

Yusong Gong,

Qian Wan,

Shizhao Chen,

Shezheng Song,

Chengyu Wang,

Meng Wang; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shan and Yang, Zhao and Yan, Tianwei and Gong, Yusong and Wan, Qian and Chen, Shizhao and Song, Shezheng and Wang, Chengyu and Wang, Meng}, title = {Seeing the Abstract: A Benchmark for Visual-Only Metaphor Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2304-2313} }
STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding: Wenhao Li,

Xueying Jiang,

Gongjie Zhang,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Jiang, Xueying and Zhang, Gongjie and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8185-8194} }
Contact Matrix: Enhancing Dance Motion Synthesis with Precise Interaction Modeling: Xuhai Chen,

Zhi Cen,

Huaijin Pi,

Sida Peng,

Xiaowei Zhou,

Yong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xuhai and Cen, Zhi and Pi, Huaijin and Peng, Sida and Zhou, Xiaowei and Liu, Yong}, title = {Contact Matrix: Enhancing Dance Motion Synthesis with Precise Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3598-3608} }
VACoT: Rethinking Visual Data Augmentation with VLMs: Zhengzhuo Xu,

Chong Sun,

SiNan Du,

Chen Li,

Jing Lyu,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhengzhuo and Sun, Chong and Du, SiNan and Li, Chen and Lyu, Jing and Yuan, Chun}, title = {VACoT: Rethinking Visual Data Augmentation with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9780-9790} }
KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving: Rufan Bai,

Tianyi Xue,

Tiantian Zhou,

Weiwei Wu,

Changle Li,

Yuhuan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Rufan and Xue, Tianyi and Zhou, Tiantian and Wu, Weiwei and Li, Changle and Lu, Yuhuan}, title = {KnowMTP: A Knowledge-Guided Framework for Multi-Agent Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {980-989} }
Block Cascading: Training Free Acceleration of Block-Causal Video Models: Hmrishav Bandyopadhyay,

Nikhil Pinnaparaju,

Rahim Entezari,

Jim Scott,

Yi-Zhe Song,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bandyopadhyay_2026_CVPR, author = {Bandyopadhyay, Hmrishav and Pinnaparaju, Nikhil and Entezari, Rahim and Scott, Jim and Song, Yi-Zhe and Jampani, Varun}, title = {Block Cascading: Training Free Acceleration of Block-Causal Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4078-4088} }
GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution: Zehong Ke,

Zhiyuan Liu,

Yuning Wang,

Jinhao Li,

Junkai Jiang,

Yanbo Jiang,

Zhenhua Xu,

Jianqiang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Zehong and Liu, Zhiyuan and Wang, Yuning and Li, Jinhao and Jiang, Junkai and Jiang, Yanbo and Xu, Zhenhua and Wang, Jianqiang}, title = {GRADE: Guiding Realistic Autonomous Driving with Adaptive Trajectory Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1029-1038} }
Group Relative Attention Guidance for Image Editing: Xuanpu Zhang,

Xuesong Niu,

Ruidong Chen,

Dan Song,

Jianhao Zeng,

Penghui Du,

Haoxiang Cao,

Kai Wu,

An-an Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuanpu and Niu, Xuesong and Chen, Ruidong and Song, Dan and Zeng, Jianhao and Du, Penghui and Cao, Haoxiang and Wu, Kai and Liu, An-an}, title = {Group Relative Attention Guidance for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3840-3850} }
Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding: Ziyang Wang,

Honglu Zhou,

Shijie Wang,

Junnan Li,

Caiming Xiong,

Silvio Savarese,

Mohit Bansal,

Michael S. Ryoo,

Juan Carlos Niebles; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhou, Honglu and Wang, Shijie and Li, Junnan and Xiong, Caiming and Savarese, Silvio and Bansal, Mohit and Ryoo, Michael S. and Niebles, Juan Carlos}, title = {Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9088-9099} }
GeneFlow: Modeling Heredity and Variation via Flow Matching Transformers for Kinship Verification: Yihang Wu,

Xianxu Hou,

Linlin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yihang and Hou, Xianxu and Shen, Linlin}, title = {GeneFlow: Modeling Heredity and Variation via Flow Matching Transformers for Kinship Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3688-3697} }
EIRES:Training-free AI-Generated Image Detection via Edit-Induced Reconstruction Error Shift: Wan Jiang,

Jing Yan,

Xiaojing Chen,

Ling Shen,

Chenhao Lin,

Yunfeng Diao,

Richang Hong; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Wan and Yan, Jing and Chen, Xiaojing and Shen, Ling and Lin, Chenhao and Diao, Yunfeng and Hong, Richang}, title = {EIRES:Training-free AI-Generated Image Detection via Edit-Induced Reconstruction Error Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6486-6495} }
A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models: Duo Li,

Zuhao Yang,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Duo and Yang, Zuhao and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {A Comprehensive Study on Visual Token Redundancy for Discrete Diffusion-based Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2823-2833} }
BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation: Zihao Zhu,

Ruotong Wang,

Siwei Lyu,

Min Zhang,

Baoyuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zihao and Wang, Ruotong and Lyu, Siwei and Zhang, Min and Wu, Baoyuan}, title = {BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8661-8671} }
PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting: Kangmin Seo,

MinKyu Lee,

Tae-Young Kim,

ByeongCheol Lee,

JoonSeoung An,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Kangmin and Lee, MinKyu and Kim, Tae-Young and Lee, ByeongCheol and An, JoonSeoung and Heo, Jae-Pil}, title = {PDF-GS: Progressive Distractor Filtering for Robust 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {468-477} }
Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery: Bohan Zhang,

Weidong Tang,

Zhixiang Chi,

Yi Jin,

Zhenbo Li,

Yang Wang,

Yanan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bohan and Tang, Weidong and Chi, Zhixiang and Jin, Yi and Li, Zhenbo and Wang, Yang and Wu, Yanan}, title = {Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7830-7840} }
Semantic-Aware Spectral Reconstruction: A Spectral Library-Aided Unsupervised Method Based on the Diffusion Model: Keli Deng,

Yuntao Qian; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Keli and Qian, Yuntao}, title = {Semantic-Aware Spectral Reconstruction: A Spectral Library-Aided Unsupervised Method Based on the Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4940-4949} }
InternVL-X: Advancing and Accelerating InternVL Series with Efficient Visual Token Compression: Dongchen Lu,

Zilu Zhang,

Leping Huang,

Yuyao Sun,

Jianliang Zeng,

Mao Shu,

Huo Cao; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Dongchen and Zhang, Zilu and Huang, Leping and Sun, Yuyao and Zeng, Jianliang and Shu, Mao and Cao, Huo}, title = {InternVL-X: Advancing and Accelerating InternVL Series with Efficient Visual Token Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5765-5775} }
FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation: Dian Shao,

Zhengzheng Xu,

Peiyang Wang,

Like Liu,

Yule Wang,

Jieqi Shi,

Jing Huo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Dian and Xu, Zhengzheng and Wang, Peiyang and Liu, Like and Wang, Yule and Shi, Jieqi and Huo, Jing}, title = {FineCog-Nav: Integrating Fine-grained Cognitive Modules for Zero-shot Multimodal UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1325-1334} }
Earthquake-Bench: Video Generation Benchmark for Earthquake Simulation: Lei Bao,

Hao Chen,

Yuyan Chen,

Kui Wu,

Lijia Chen,

Fangwei Zhong,

Feiran Huang,

Bo Song,

Han Yang; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Lei and Chen, Hao and Chen, Yuyan and Wu, Kui and Chen, Lijia and Zhong, Fangwei and Huang, Feiran and Song, Bo and Yang, Han}, title = {Earthquake-Bench: Video Generation Benchmark for Earthquake Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4055-4066} }
Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks: Yu Yan,

Sheng Sun,

Shengjia Cheng,

Teli Liu,

Mingfeng Li,

Min Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Yu and Sun, Sheng and Cheng, Shengjia and Liu, Teli and Li, Mingfeng and Liu, Min}, title = {Red-teaming the Multimodal Reasoning: Jailbreaking Vision-Language Models via Cross-modal Entanglement Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {837-846} }
Quantifying the Gap between Understanding and Generation within Unified Multimodal Models: Chenlong Wang,

Yuhang Chen,

Zhihan Hu,

Dongping Chen,

Wenhu Chen,

Sarah Wiegreffe,

Tianyi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenlong and Chen, Yuhang and Hu, Zhihan and Chen, Dongping and Chen, Wenhu and Wiegreffe, Sarah and Zhou, Tianyi}, title = {Quantifying the Gap between Understanding and Generation within Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5936-5946} }
Plug-and-Think: Structured Reasoning for Vision-Language-Action Models: Kaikai Wei,

Di wen,

Xinhai Li,

Senwei Xiang; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Kaikai and wen, Di and Li, Xinhai and Xiang, Senwei}, title = {Plug-and-Think: Structured Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3136-3145} }
Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs: Chenxi Zhao,

Yan Zhou,

Jufeng Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhou, Yan and Yang, Jufeng}, title = {Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8897-8906} }
Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization: Qinghui He,

Haifeng Zhang,

Xiuli Bi,

Bo Liu,

Chi-Man Pun,

Bin Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Xiao, Bin}, title = {Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6748-6757} }
ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection: Yupeng Zhang,

Ruize Han,

Fangnan Zhou,

Wei Feng,

Liang Wan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Zhou, Fangnan and Feng, Wei and Wan, Liang}, title = {ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6643-6654} }
Latent-Compressed Variational Autoencoder for Video Diffusion Models: Jiarui Guan,

Wenshuai Zhao,

Zhengtao Zou,

Juho Kannala,

Arno Solin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Jiarui and Zhao, Wenshuai and Zou, Zhengtao and Kannala, Juho and Solin, Arno}, title = {Latent-Compressed Variational Autoencoder for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3873-3883} }
CaptAin: Caption-driven Alignment for Bridging Modality Gaps in Partially Relevant Video Retrieval: Chuanshen Chen,

Kai Zhou,

Feiqi Wang,

Yutao Ning,

Zhendong Xiong,

Yirui Li,

Zhiquan Wen,

Mingkui Tan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Chuanshen and Zhou, Kai and Wang, Feiqi and Ning, Yutao and Xiong, Zhendong and Li, Yirui and Wen, Zhiquan and Tan, Mingkui}, title = {CaptAin: Caption-driven Alignment for Bridging Modality Gaps in Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6208-6217} }
Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering: Lin Fan,

Yafei Ou,

Zhipeng Deng,

Pengyu Dai,

Chongxian Hou,

Jiale Yan,

Yaqian Li,

Kaiwen Long,

Xun Gong,

Masayuki Ikebe,

Yefeng Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Lin and Ou, Yafei and Deng, Zhipeng and Dai, Pengyu and Hou, Chongxian and Yan, Jiale and Li, Yaqian and Long, Kaiwen and Gong, Xun and Ikebe, Masayuki and Zheng, Yefeng}, title = {Step-CoT: Stepwise Visual Chain-of-Thought for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2049-2059} }
NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation: Huanxin Zou,

Zhize Wu,

Yue Jiang,

Jijian Zhou,

Zhiwei Xu,

Teng Li,

Jianhua Shu,

Fan Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Huanxin and Wu, Zhize and Jiang, Yue and Zhou, Jijian and Xu, Zhiwei and Li, Teng and Shu, Jianhua and Cheng, Fan}, title = {NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7562-7572} }
Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings: Fatemeh Akbarian,

Anahita Baninajjar,

Yingyi Zhang,

Ananth Balashankar,

Amir Aminifar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Akbarian_2026_CVPR, author = {Akbarian, Fatemeh and Baninajjar, Anahita and Zhang, Yingyi and Balashankar, Ananth and Aminifar, Amir}, title = {Breaking the Illusion: Consensus-Based Generative Mitigation of Adversarial Illusions in Multi-Modal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {748-757} }
LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates: Minkwan Kim,

Seungmin Lee,

Junho Kim,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minkwan and Lee, Seungmin and Kim, Junho and Kim, Young Min}, title = {LTGS: Long-Term Gaussian Scene Chronology From Sparse View Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {488-497} }
From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness: Chenlin Fu,

Ao Gong,

Xingtao Ling,

Yingying Zhu; [pdf]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Chenlin and Gong, Ao and Ling, Xingtao and Zhu, Yingying}, title = {From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7101-7110} }
VideoMatGen: PBR Materials through Joint Generative Modeling: Jon Hasselgren,

Milos Hasan,

Zheng Zeng,

Jacob Munkberg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hasselgren_2026_CVPR, author = {Hasselgren, Jon and Hasan, Milos and Zeng, Zheng and Munkberg, Jacob}, title = {VideoMatGen: PBR Materials through Joint Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2440-2450} }
ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding: Hosu Lee,

Junho Kim,

Hyunjun Kim,

Yong Man Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Hosu and Kim, Junho and Kim, Hyunjun and Ro, Yong Man}, title = {ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8291-8302} }
Environmental Understanding Vision-language Model for Embodied Agent: Jinsik Bang,

Jaeyeon Bae,

Donggyu Lee,

Siyeol Jung,

Taehwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bang_2026_CVPR, author = {Bang, Jinsik and Bae, Jaeyeon and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Environmental Understanding Vision-language Model for Embodied Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3092-3102} }
Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints: Jungkon Kim,

Cheolseung Jung,

Jong-Min Choi,

Juseong Lee; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jungkon and Jung, Cheolseung and Choi, Jong-Min and Lee, Juseong}, title = {Phantom: A Unified Face-Swap Deepfake Protection Framework with Latent and Spatial Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {847-856} }
VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models: Pavan Kumar Anasosalu Vasu,

Cem Koc,

Fartash Faghri,

Chun-Liang Li,

Bo Feng,

Zhengfeng Lai,

Meng Cao,

Oncel Tuzel,

Hadi Pouransari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vasu_2026_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Koc, Cem and Faghri, Fartash and Li, Chun-Liang and Feng, Bo and Lai, Zhengfeng and Cao, Meng and Tuzel, Oncel and Pouransari, Hadi}, title = {VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9654-9663} }
Learning Vision-Language-Action World Models for Autonomous Driving: Guoqing Wang,

Pin Tang,

Xiangxuan Ren,

Guodongfang Zhao,

Bailan Feng,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Guoqing and Tang, Pin and Ren, Xiangxuan and Zhao, Guodongfang and Feng, Bailan and Ma, Chao}, title = {Learning Vision-Language-Action World Models for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1073-1084} }
Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models: Haoyi Sun,

Xiaoxiao Wang,

Ning Mao,

Qian Wang,

Lifu Mu,

Wen Zheng,

Tao Wei,

Wei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Haoyi and Wang, Xiaoxiao and Mao, Ning and Wang, Qian and Mu, Lifu and Zheng, Wen and Wei, Tao and Chen, Wei}, title = {Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9316-9326} }
DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning: Chi Zhang,

Haibo Qiu,

Qiming Zhang,

Zhixiong Zeng,

Lin Ma,

Jing Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Zeng, Zhixiong and Ma, Lin and Zhang, Jing}, title = {DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9389-9400} }
FLAIR: Frequency- and Locality-Aware Implicit Neural Representations: Sukhun Ko,

Seokhyun Youn,

Dahyeon Kye,

Kyle Min,

Chanho Eom,

Jihyong Oh; [pdf] [supp]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Sukhun and Youn, Seokhyun and Kye, Dahyeon and Min, Kyle and Eom, Chanho and Oh, Jihyong}, title = {FLAIR: Frequency- and Locality-Aware Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4877-4887} }
Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection: Sanghoon Lee,

Geon Lee,

Hyekang Park,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghoon and Lee, Geon and Park, Hyekang and Ham, Bumsub}, title = {Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6819-6828} }
Disentangle Once, Control All: A Unified and Efficient Framework for Disentangling Multi-Condition Control in Human Video Generation: Runqi Wang,

Chuming Wang,

Fangqiu Yi,

Yuying Zhao,

Jingyu Xu,

Yuhang Dai,

Zheng Wang,

Chi Zhang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Runqi and Wang, Chuming and Yi, Fangqiu and Zhao, Yuying and Xu, Jingyu and Dai, Yuhang and Wang, Zheng and Zhang, Chi}, title = {Disentangle Once, Control All: A Unified and Efficient Framework for Disentangling Multi-Condition Control in Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3903-3913} }
Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark: Yibin Ye,

Xichao Teng,

Shuo Chen,

Leqi Liu,

Kun Wang,

Xiaokai Song,

Zhang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yibin and Teng, Xichao and Chen, Shuo and Liu, Leqi and Wang, Kun and Song, Xiaokai and Li, Zhang}, title = {Exploring the best way for UAV visual localization under Low-altitude Multi-view Observation Condition: a Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1731-1741} }
Stochastic Perturbations Improve Distribution-to-Distribution Generative Models: Shiye Su,

Yuhui Zhang,

Linqi Zhou,

Rajesh Ranganath,

Serena Yeung-Levy; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Shiye and Zhang, Yuhui and Zhou, Linqi and Ranganath, Rajesh and Yeung-Levy, Serena}, title = {Stochastic Perturbations Improve Distribution-to-Distribution Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3965-3974} }
Decomposing Subject-Driven Image Generation via Intermediate Structural Prediction: Hanzhong Guo,

Yizhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhong and Yu, Yizhou}, title = {Decomposing Subject-Driven Image Generation via Intermediate Structural Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3799-3808} }
BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities: Akash Sharma,

Chinmay Mhatre,

Sankalp Gawali,

Ruthvik Bokkasam,

Brij Sharma,

Vishwajeet Pattanaik,

Punit Rathore,

Raghu Krishnapuram,

Vijay Gopal Kovvali,

Anirban Chakraborty,

Yogesh Simmhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Akash and Mhatre, Chinmay and Gawali, Sankalp and Bokkasam, Ruthvik and Sharma, Brij and Pattanaik, Vishwajeet and Rathore, Punit and Krishnapuram, Raghu and Kovvali, Vijay Gopal and Chakraborty, Anirban and Simmhan, Yogesh}, title = {BMD-45: A Large-Scale CCTV Vehicle Detection Dataset for Urban Traffic in Developing Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2240-2249} }
Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios: Xuekang Zhu,

Ji-Zhe Zhou,

Kaiwen Feng,

Chenfan Qu,

Xiwen Wang,

Yunfei Wang,

Liting Zhou,

Jian Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xuekang and Zhou, Ji-Zhe and Feng, Kaiwen and Qu, Chenfan and Wang, Xiwen and Wang, Yunfei and Zhou, Liting and Liu, Jian}, title = {Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7198-7207} }
Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention: Wenhu Zhang,

Yiming Wu,

Huanyu Wang,

YaoYang Liu,

Huanzhang Dou,

Senqiao Yang,

Sitong Wu,

Hanbin Zhao,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenhu and Wu, Yiming and Wang, Huanyu and Liu, YaoYang and Dou, Huanzhang and Yang, Senqiao and Wu, Sitong and Zhao, Hanbin and Jia, Jiaya}, title = {Efficient Long-Context Modeling in Diffusion Language Models via Block Approximate Sparse Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2852-2862} }
MIRA: Multimodal Iterative Reasoning Agent for Image Editing: Ziyun Zeng,

Hang Hua,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyun and Hua, Hang and Luo, Jiebo}, title = {MIRA: Multimodal Iterative Reasoning Agent for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9563-9573} }
SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation: Gia Huy Thai,

Hoang-Nguyen Vu,

Anh-Minh Phan,

Quang-Thinh Ly,

Thi-Ngoc-Truc Nguyen,

Nhat Ho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thai_2026_CVPR, author = {Thai, Gia Huy and Vu, Hoang-Nguyen and Phan, Anh-Minh and Ly, Quang-Thinh and Nguyen, Thi-Ngoc-Truc and Ho, Nhat}, title = {SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7337-7346} }
HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping: Zahid Hassan Tushar,

Sanjay Purushotham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tushar_2026_CVPR, author = {Tushar, Zahid Hassan and Purushotham, Sanjay}, title = {HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6955-6965} }
Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training: Yanyun Wang,

Qingqing Ye,

Li Liu,

Zi Liang,

Haibo Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yanyun and Ye, Qingqing and Liu, Li and Liang, Zi and Hu, Haibo}, title = {Robust Alignment: Harmonizing Clean Accuracy and Adversarial Robustness in Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {768-778} }
DSAA: Dual-Stage Attribute Activation for Fine-Grained Open Vocabulary Detection: Donghong Jiang,

Endian Lin,

Hanqing Liu,

Mingjie Liu,

Luoping Cui,

Zhao Yang,

Chuang Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Donghong and Lin, Endian and Liu, Hanqing and Liu, Mingjie and Cui, Luoping and Yang, Zhao and Zhu, Chuang}, title = {DSAA: Dual-Stage Attribute Activation for Fine-Grained Open Vocabulary Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6538-6547} }
Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification: Han Sun,

Qin Li,

Peixin Wang,

Min Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Han and Li, Qin and Wang, Peixin and Zhang, Min}, title = {Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8930-8940} }
Memorization in 3D Shape Generation: An Empirical Study: Shu Pu,

Boya Zeng,

Kaichen Zhou,

Mengyu Wang,

Zhuang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pu_2026_CVPR, author = {Pu, Shu and Zeng, Boya and Zhou, Kaichen and Wang, Mengyu and Liu, Zhuang}, title = {Memorization in 3D Shape Generation: An Empirical Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1828-1838} }
FusionBridge: An Efficient Fusion Via Feature Disentanglement for Multi-Modal Object Re-Identification: Yali Li,

Qianru Han,

Xinwei He,

Zhi Liu,

Jinhai Xiang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yali and Han, Qianru and He, Xinwei and Liu, Zhi and Xiang, Jinhai}, title = {FusionBridge: An Efficient Fusion Via Feature Disentanglement for Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5735-5744} }
Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters: Chris Vorster,

Mayug Maniparambil,

Noel O'Connor,

Noel Murphy,

Derek Molloy; [pdf] [supp]
[bibtex]
@InProceedings{Vorster_2026_CVPR, author = {Vorster, Chris and Maniparambil, Mayug and O'Connor, Noel and Murphy, Noel and Molloy, Derek}, title = {Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7820-7829} }
Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks: Shijie Lian,

Changti Wu,

Laurence Tianruo Yang,

Hang Yuan,

Bin Yu,

Lei Zhang,

Kai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Shijie and Wu, Changti and Yang, Laurence Tianruo and Yuan, Hang and Yu, Bin and Zhang, Lei and Chen, Kai}, title = {Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9824-9835} }
Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings: Luca Parolari,

Nicla Faccioli,

Lamberto Ballan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parolari_2026_CVPR, author = {Parolari, Luca and Faccioli, Nicla and Ballan, Lamberto}, title = {Benchmarking Layout-Guided Diffusion Models through Unified Semantic-Spatial Evaluation in Closed and Open Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1742-1751} }
LWTformer: A Detail-Aware, Learnable Wavelet-Transformer for Ancient Chinese Character Image Restoration: Wentao Ruan,

Xinhui Li,

Zhan Cheng,

Cunhang Fan,

Libao Tian,

Zhao Lv; [pdf] [supp]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Wentao and Li, Xinhui and Cheng, Zhan and Fan, Cunhang and Tian, Libao and Lv, Zhao}, title = {LWTformer: A Detail-Aware, Learnable Wavelet-Transformer for Ancient Chinese Character Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4971-4981} }
OmniInsert: Mask-Free Video Insertion of Any Reference via Diffusion Transformer Models: Jinshu Chen,

Xinghui Li,

Xu Bai,

Tianxiang Ma,

Pengze Zhang,

Mengtian Li,

Zhuowei Chen,

Gen Li,

Lijie Liu,

Songtao Zhao,

Bingchuan Li,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jinshu and Li, Xinghui and Bai, Xu and Ma, Tianxiang and Zhang, Pengze and Li, Mengtian and Chen, Zhuowei and Li, Gen and Liu, Lijie and Zhao, Songtao and Li, Bingchuan and He, Qian}, title = {OmniInsert: Mask-Free Video Insertion of Any Reference via Diffusion Transformer Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4067-4077} }
DaMN: Deleting and Migrating Normalization Layers from Transformers: Alexey Ryabykin,

Irina Zhelavskaya,

Egor Shvetsov,

Alexey Rukhovich,

Nikita Okhotnikov,

Artem Khrapov,

Evgeny Burnaev,

Vladimir Mikhailovich Kryzhanovskiy; [pdf] [supp]
[bibtex]
@InProceedings{Ryabykin_2026_CVPR, author = {Ryabykin, Alexey and Zhelavskaya, Irina and Shvetsov, Egor and Rukhovich, Alexey and Okhotnikov, Nikita and Khrapov, Artem and Burnaev, Evgeny and Kryzhanovskiy, Vladimir Mikhailovich}, title = {DaMN: Deleting and Migrating Normalization Layers from Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2883-2892} }
PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views: Jiaxin Shi,

Guofeng Zhang,

Wufei Ma,

Naifu Liang,

Adam Kortylewski,

Alan Yuille; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Jiaxin and Zhang, Guofeng and Ma, Wufei and Liang, Naifu and Kortylewski, Adam and Yuille, Alan}, title = {PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6869-6879} }
MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation: Hanjun Tao,

Hua Wang,

Fan Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Hanjun and Wang, Hua and Zhang, Fan}, title = {MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7388-7397} }
HoliSafe: Holistic Safety Benchmarking and Modeling for Vision-Language Model: Youngwan Lee,

Kangsan Kim,

Kwanyong Park,

Ilchae Jung,

Soojin Jang,

Seanie Lee,

Yong-Ju Lee,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Youngwan and Kim, Kangsan and Park, Kwanyong and Jung, Ilchae and Jang, Soojin and Lee, Seanie and Lee, Yong-Ju and Hwang, Sung Ju}, title = {HoliSafe: Holistic Safety Benchmarking and Modeling for Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5989-5998} }
Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer: Chenhang Cui,

An Zhang,

Yuxin Chen,

Gelei Deng,

Jingnan Zheng,

Zhenkai Liang,

Xiang Wang,

Tat-Seng Chua; [pdf]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Chenhang and Zhang, An and Chen, Yuxin and Deng, Gelei and Zheng, Jingnan and Liang, Zhenkai and Wang, Xiang and Chua, Tat-Seng}, title = {Do LLMs and VLMs Share Reasoning Neurons? Evidence and Mechanisms of Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2346-2356} }
OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation: Seungjae Moon,

Seunghyun Oh,

Youngmin Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, Seungjae and Oh, Seunghyun and Ro, Youngmin}, title = {OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7357-7367} }
BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices: Mincheol Kang,

HyunJin Lim,

Bomin Kang,

Daehee Park; [pdf] [supp]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Mincheol and Lim, HyunJin and Kang, Bomin and Park, Daehee}, title = {BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3470-3480} }
TP^2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids: Ya-Yun Cheng,

Kan Tippayamontri,

Chih-Yuan Yang,

Jane Yung-jen Hsu; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ya-Yun and Tippayamontri, Kan and Yang, Chih-Yuan and Hsu, Jane Yung-jen}, title = {TP{\textasciicircum}2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8237-8246} }
ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling: Shaobo Ju,

Baiyang Song,

Tao Chen,

Jiapeng Zhang,

Qiong Wu,

Chao Chang,

Huaixi Wang,

Yiyi Zhou,

Rongrong Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Ju_2026_CVPR, author = {Ju, Shaobo and Song, Baiyang and Chen, Tao and Zhang, Jiapeng and Wu, Qiong and Chang, Chao and Wang, Huaixi and Zhou, Yiyi and Ji, Rongrong}, title = {ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8326-8336} }
Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting: Ananta R. Bhattarai,

Helge Rhodin; [pdf] [supp]
[bibtex]
@InProceedings{Bhattarai_2026_CVPR, author = {Bhattarai, Ananta R. and Rhodin, Helge}, title = {Re-Depth Anything: Test-Time Depth Refinement via Self-Supervised Re-lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {602-612} }
TransKV: A Data-Driven Pruning Method for Large Foundation Models: Guangning Xu,

Fanxu Meng,

Ruijie Zhou,

Michael K Ng,

Wenjie Pei,

Muhan Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guangning and Meng, Fanxu and Zhou, Ruijie and Ng, Michael K and Pei, Wenjie and Zhang, Muhan}, title = {TransKV: A Data-Driven Pruning Method for Large Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2451-2461} }
CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare: Akash Ghosh,

Tajamul Ashraf,

Rishu Kumar Singh,

Numan Saeed,

Sriparna Saha,

Xiuying Chen,

Salman Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Akash and Ashraf, Tajamul and Singh, Rishu Kumar and Saeed, Numan and Saha, Sriparna and Chen, Xiuying and Khan, Salman}, title = {CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9695-9705} }
Learning by Neighbor-Aware Semantics, Deciding by Open-Form Flows: Towards Robust Zero-Shot Skeleton Action Recognition: Yang Chen,

Miaoge Li,

Zhijie Rao,

Deze Zeng,

Song Guo,

Jingcai Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yang and Li, Miaoge and Rao, Zhijie and Zeng, Deze and Guo, Song and Guo, Jingcai}, title = {Learning by Neighbor-Aware Semantics, Deciding by Open-Form Flows: Towards Robust Zero-Shot Skeleton Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3374-3383} }
InstaDA: Augmenting Instance Segmentation Data with Dual-Agent System: Xianbao Hou,

Yonghao He,

Zeyd Boukhers,

John See,

Hu Su,

Wei Sui,

Cong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Xianbao and He, Yonghao and Boukhers, Zeyd and See, John and Su, Hu and Sui, Wei and Yang, Cong}, title = {InstaDA: Augmenting Instance Segmentation Data with Dual-Agent System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4300-4309} }
MaMe: Matrix-Based Token Merging: Simin Huo,

Ning Li; [pdf] [supp]
[bibtex]
@InProceedings{Huo_2026_CVPR, author = {Huo, Simin and Li, Ning}, title = {MaMe: Matrix-Based Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2863-2872} }
BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models: Jiayao Wang,

Yiping Zhang,

Mohammad Maruf Hasan,

Xiaoying Lei,

Jiale Zhang,

Junwu Zhu,

Qilin Wu,

Dongfang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayao and Zhang, Yiping and Hasan, Mohammad Maruf and Lei, Xiaoying and Zhang, Jiale and Zhu, Junwu and Wu, Qilin and Zhao, Dongfang}, title = {BadRSSD: Backdoor Attacks on Regularized Self-Supervised Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {705-715} }
Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression: Xiang Liu,

Yimin Zhou,

Jinxiang Wang,

Yujun Huang,

Shuzhao Xie,

Shiyu Qin,

Mingyao Hong,

Jiawei Li,

Yaowei Wang,

Zhi Wang,

Shu-Tao Xia,

Bin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiang and Zhou, Yimin and Wang, Jinxiang and Huang, Yujun and Xie, Shuzhao and Qin, Shiyu and Hong, Mingyao and Li, Jiawei and Wang, Yaowei and Wang, Zhi and Xia, Shu-Tao and Chen, Bin}, title = {Splatwizard: A Benchmark Toolkit for 3D Gaussian Splatting Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2261-2271} }
CP-IMoE: Collaborative Prompt-Guided Interactive Mixture-of-Experts for Incomplete Multimodal Learning: Jing Li,

Dongbo Zhang,

Yalin Zheng,

Yanda Meng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jing and Zhang, Dongbo and Zheng, Yalin and Meng, Yanda}, title = {CP-IMoE: Collaborative Prompt-Guided Interactive Mixture-of-Experts for Incomplete Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6090-6099} }
CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition: Zhongquan Jian,

Yanhao Chen,

Bingbing Hu,

Wenhan Lv,

Shaopan Wang,

Jipeng Wu,

Junfeng Yao,

Yang Lu,

Qingqiang Wu; [pdf] [supp]
[bibtex]
@InProceedings{Jian_2026_CVPR, author = {Jian, Zhongquan and Chen, Yanhao and Hu, Bingbing and Lv, Wenhan and Wang, Shaopan and Wu, Jipeng and Yao, Junfeng and Lu, Yang and Wu, Qingqiang}, title = {CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7706-7716} }
B-MoE: A Body-Part-Aware Mixture-of-Experts "All Parts Matter" Approach to Micro-Action Recognition: Nishit Poddar,

Aglind Reka,

Diana-Laura Borza,

Snehashis Majhi,

Michal Balazia,

Abhijit Das,

François Brémond; [pdf] [arXiv]
[bibtex]
@InProceedings{Poddar_2026_CVPR, author = {Poddar, Nishit and Reka, Aglind and Borza, Diana-Laura and Majhi, Snehashis and Balazia, Michal and Das, Abhijit and Br\'emond, Fran\c{c}ois}, title = {B-MoE: A Body-Part-Aware Mixture-of-Experts ''All Parts Matter'' Approach to Micro-Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3364-3373} }
See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models: Le Thien Phuc Nguyen,

Zhuoran Yu,

Samuel Low Yu Hang,

Subin An,

Jeongik Lee,

Yohan Ban,

SeungEun Chung,

Thanh-Huy Nguyen,

JuWan Maeng,

Soochahn Lee,

Yong Jae Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Le Thien Phuc and Yu, Zhuoran and Hang, Samuel Low Yu and An, Subin and Lee, Jeongik and Ban, Yohan and Chung, SeungEun and Nguyen, Thanh-Huy and Maeng, JuWan and Lee, Soochahn and Lee, Yong Jae}, title = {See, Hear, and Understand: Benchmarking Audiovisual Human Speech Understanding in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2272-2283} }
Object-Centric Vision Token Pruning for Vision Language Models: Guangyuan Li,

Rongzhen Zhao,

Jinhong Deng,

Yanbo Wang,

Joni Pajarinen; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Guangyuan and Zhao, Rongzhen and Deng, Jinhong and Wang, Yanbo and Pajarinen, Joni}, title = {Object-Centric Vision Token Pruning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7040-7049} }
M^4Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation: Meihua Zhou,

Xinyu Tong,

Li Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Meihua and Tong, Xinyu and Yang, Li}, title = {M{\textasciicircum}4Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5368-5377} }
Understanding Reward Hacking in Text-to-Image Reinforcement Learning: Yunqi Hong,

Kuei-Chun Kao,

Hengguang Zhou,

Cho-Jui Hsieh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Yunqi and Kao, Kuei-Chun and Zhou, Hengguang and Hsieh, Cho-Jui}, title = {Understanding Reward Hacking in Text-to-Image Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4245-4255} }
SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks: Thiru Thillai Nadarasar Bahavan,

Sachith Seneviratne,

Saman Halgamuge; [pdf] [supp]
[bibtex]
@InProceedings{Bahavan_2026_CVPR, author = {Bahavan, Thiru Thillai Nadarasar and Seneviratne, Sachith and Halgamuge, Saman}, title = {SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6901-6910} }
POS-ISP: Pipeline Optimization at the Sequence Level for Task-aware ISP: Jiyun Won,

Heemin Yang,

Woohyeok Kim,

Jungseul Ok,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Won_2026_CVPR, author = {Won, Jiyun and Yang, Heemin and Kim, Woohyeok and Ok, Jungseul and Cho, Sunghyun}, title = {POS-ISP: Pipeline Optimization at the Sequence Level for Task-aware ISP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4931-4939} }
RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer: Yoonwoo Ha,

Hyungpil Moon; [pdf] [supp]
[bibtex]
@InProceedings{Ha_2026_CVPR, author = {Ha, Yoonwoo and Moon, Hyungpil}, title = {RACE-6D: Real-time Accurate Coarse-to-finE Object 6D Pose Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1525-1534} }
GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models: Lianming Huang,

Haibo Hu,

Qiao Li,

Xin He,

Nan Guan,

Chun Jason Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Lianming and Hu, Haibo and Li, Qiao and He, Xin and Guan, Nan and Xue, Chun Jason}, title = {GM-Skip: Metric-Guided Transformer Block Skipping for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2834-2843} }
coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation: Chunhan Li,

Qifeng Wu,

Jia-Hui Pan,

Ka-Hei Hui,

Jingyu Hu,

Yuming Jiang,

Bin Sheng,

Xihui Liu,

Wenjuan Gong,

Zhengzhe Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunhan and Wu, Qifeng and Pan, Jia-Hui and Hui, Ka-Hei and Hu, Jingyu and Jiang, Yuming and Sheng, Bin and Liu, Xihui and Gong, Wenjuan and Liu, Zhengzhe}, title = {coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9802-9812} }
Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification: Shenghui Yue,

Rui Wang,

Tianyang Xu,

Tao Zhou,

Xiao-Jun Wu,

Josef Kittler; [pdf]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Shenghui and Wang, Rui and Xu, Tianyang and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Res2SPDNet: Multi-Granularity SPD Matrix Residual Learning for Signal Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2639-2648} }
Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings: Bumjun Kim,

Albert No; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Bumjun and No, Albert}, title = {Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7988-7997} }
Recursive Think-Answer Process for LLMs and VLMs: Byung-Kwan Lee,

Youngchae Chee,

Yong Man Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Chee, Youngchae and Ro, Yong Man}, title = {Recursive Think-Answer Process for LLMs and VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9608-9621} }
DUALVISION: RGB-Infrared Multimodal Large Language Models for Robust Visual Reasoning: Abrar Majeedi,

Zhiyuan Ruan,

Ziyi Zhao,

Hongcheng Wang,

Jianglin Lu,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Majeedi_2026_CVPR, author = {Majeedi, Abrar and Ruan, Zhiyuan and Zhao, Ziyi and Wang, Hongcheng and Lu, Jianglin and Li, Yin}, title = {DUALVISION: RGB-Infrared Multimodal Large Language Models for Robust Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5786-5795} }
What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters: Shaobo Liu,

Haobo Xiong,

Kai Liu,

Yuna Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shaobo and Xiong, Haobo and Liu, Kai and Lin, Yuna}, title = {What and Where to Adapt: Structure-Semantics Co-Tuning for Machine Vision Compression via Synergistic Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2813-2822} }
FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning: Qinghui Gong,

Xue Yang,

Xunlei Chen,

Jinshan Lai,

Hua Meng,

Xiaohu Tang; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Qinghui and Yang, Xue and Chen, Xunlei and Lai, Jinshan and Meng, Hua and Tang, Xiaohu}, title = {FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8009-8018} }
S3O: Selective Spatial-Spectral Operator for Cross-Scale Fusion: Jieyuan Pei,

Wei Li,

Zhuoxuan Li,

Junwei Zhu,

Meiyi Lu,

Jiawei Jiang,

Chenyu Wang,

Jianwei Zheng; [pdf]
[bibtex]
@InProceedings{Pei_2026_CVPR, author = {Pei, Jieyuan and Li, Wei and Li, Zhuoxuan and Zhu, Junwei and Lu, Meiyi and Jiang, Jiawei and Wang, Chenyu and Zheng, Jianwei}, title = {S3O: Selective Spatial-Spectral Operator for Cross-Scale Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6281-6290} }
HiStream: Efficient High-Resolution Video Generation via Redundancy Eliminated Streaming: Haonan Qiu,

Shikun Liu,

Zijian Zhou,

Zhaochong An,

Weiming Ren,

Zhiheng Liu,

Jonas Schult,

Sen He,

Shoufa Chen,

Yuren Cong,

Tao Xiang,

Ziwei Liu,

Juan-Manuel Perez-Rua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Haonan and Liu, Shikun and Zhou, Zijian and An, Zhaochong and Ren, Weiming and Liu, Zhiheng and Schult, Jonas and He, Sen and Chen, Shoufa and Cong, Yuren and Xiang, Tao and Liu, Ziwei and Perez-Rua, Juan-Manuel}, title = {HiStream: Efficient High-Resolution Video Generation via Redundancy Eliminated Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4603-4613} }
EpiMask: Leveraging Epipolar Distance Based Masks in Cross-Attention for Satellite Image Matching: Rahul Deshmukh,

Aditya Chauhan,

Avinash Kak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deshmukh_2026_CVPR, author = {Deshmukh, Rahul and Chauhan, Aditya and Kak, Avinash}, title = {EpiMask: Leveraging Epipolar Distance Based Masks in Cross-Attention for Satellite Image Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6271-6280} }
AdaGaR: Adaptive Gabor Representation for Dynamic Scene Reconstruction: Jiewen Chan,

Zhenjun Zhao,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chan_2026_CVPR, author = {Chan, Jiewen and Zhao, Zhenjun and Liu, Yu-Lun}, title = {AdaGaR: Adaptive Gabor Representation for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4276-4289} }
VoQA: Visual-only Question Answering: Jianing An,

Luyang Jiang,

Jie Luo,

Wenjun Wu,

Lei Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Jianing and Jiang, Luyang and Luo, Jie and Wu, Wenjun and Huang, Lei}, title = {VoQA: Visual-only Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9100-9109} }
IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment: Zichen Zhu,

Yuheng Sun,

Mingxuan Zhu,

Wenjie Ma,

Situo Zhang,

Zhexiang Wang,

Ziyue Yang,

Danyang Zhang,

Kunyao Lan,

Zihan Zhao,

Dingye Liu,

Siqi Xiang,

Lu Chen,

Kai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zichen and Sun, Yuheng and Zhu, Mingxuan and Ma, Wenjie and Zhang, Situo and Wang, Zhexiang and Yang, Ziyue and Zhang, Danyang and Lan, Kunyao and Zhao, Zihan and Liu, Dingye and Xiang, Siqi and Chen, Lu and Yu, Kai}, title = {IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8672-8683} }
RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL: Yinzhou Tang,

Yu Shang,

Yinuo Chen,

Bingwen Wei,

Xin Zhang,

Shu'ang Yu,

Liangzhi Shi,

Chao Yu,

Chen Gao,

Wei Wu,

Yong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yinzhou and Shang, Yu and Chen, Yinuo and Wei, Bingwen and Zhang, Xin and Yu, Shu'ang and Shi, Liangzhi and Yu, Chao and Gao, Chen and Wu, Wei and Li, Yong}, title = {RoboScape-R: Unified Reward-Observation World Models for Generalizable Robotics Training via RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1472-1481} }
MeMix: Multi-Encoder Mixture Framework for Medical Report Generation: Yiming Cao,

Lizhen Cui,

Zhiqi Shen; [pdf]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yiming and Cui, Lizhen and Shen, Zhiqi}, title = {MeMix: Multi-Encoder Mixture Framework for Medical Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5474-5483} }
PaM-MIL: Proliferation and Metastasis Enhanced Localization for Multiple Instance Learning on Pathology Images: Pengyu Guo,

Jiachuan Wang,

Zhao CHEN,

Caleb Chen Cao,

Liping Wang,

Tingyi Jiang,

Lei Chen; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Pengyu and Wang, Jiachuan and CHEN, Zhao and Cao, Caleb Chen and Wang, Liping and Jiang, Tingyi and Chen, Lei}, title = {PaM-MIL: Proliferation and Metastasis Enhanced Localization for Multiple Instance Learning on Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5305-5314} }
IRL-VLA: Vision-Language-Action Training via Reward World Model: Anqing Jiang,

Gao Yu,

Heng Yuwen,

Yiru Wang,

Wang Shuo,

Jiang Hao,

Sun Hao; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Anqing and Yu, Gao and Yuwen, Heng and Wang, Yiru and Shuo, Wang and Hao, Jiang and Hao, Sun}, title = {IRL-VLA: Vision-Language-Action Training via Reward World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {970-979} }
SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models: Quentin Guimard,

Federico Bartsch,

Simone Caldarella,

Rahaf Aljundi,

Elisa Ricci,

Massimiliano Mancini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guimard_2026_CVPR, author = {Guimard, Quentin and Bartsch, Federico and Caldarella, Simone and Aljundi, Rahaf and Ricci, Elisa and Mancini, Massimiliano}, title = {SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8101-8110} }
Softmax-GS: Generalized Gaussians Learning When to Blend or Bound: Chen Ziwen,

Peng Wang,

Hao Tan,

Zexiang Xu,

Li Fuxin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Wang, Peng and Tan, Hao and Xu, Zexiang and Fuxin, Li}, title = {Softmax-GS: Generalized Gaussians Learning When to Blend or Bound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {508-517} }
On the Feasibility and Opportunity of Autoregressive 3D Object Detection: Zanming Huang,

Jinsu Yoo,

Sooyoung Jeon,

Zhenzhen Liu,

Mark Campbell,

Kilian Q Weinberger,

Bharath Hariharan,

Wei-Lun Chao,

Katie Z Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zanming and Yoo, Jinsu and Jeon, Sooyoung and Liu, Zhenzhen and Campbell, Mark and Weinberger, Kilian Q and Hariharan, Bharath and Chao, Wei-Lun and Luo, Katie Z}, title = {On the Feasibility and Opportunity of Autoregressive 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1170-1179} }
LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction: Jiazhou Lin,

Zhongyi Liu,

Ying Shi,

Zhichun Zhao,

Zhuoyu Wang,

Yuhang Zhou,

Huanling Hu,

Guangnan Ye,

Mengtian Li,

Lei Guo; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiazhou and Liu, Zhongyi and Shi, Ying and Zhao, Zhichun and Wang, Zhuoyu and Zhou, Yuhang and Hu, Huanling and Ye, Guangnan and Li, Mengtian and Guo, Lei}, title = {LUMINA: Learning and Understanding of Multimodal Information for Narrative and Affect-based Virality Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1629-1639} }
Indexing Multimodal Language Models for Large-scale Image Retrieval: Bahey Tharwat,

Giorgos Kordopatis-Zilos,

Pavel Suma,

Ian Reid,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Tharwat_2026_CVPR, author = {Tharwat, Bahey and Kordopatis-Zilos, Giorgos and Suma, Pavel and Reid, Ian and Tolias, Giorgos}, title = {Indexing Multimodal Language Models for Large-scale Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6737-6747} }
MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models: Xiyang Wu,

Zongxia Li,

Jihui Jin,

Gouthaman KV,

Vishnu Raj,

Nilotpal Sinha,

Jingxi Chen,

Fan Du,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiyang and Li, Zongxia and Jin, Jihui and KV, Gouthaman and Raj, Vishnu and Sinha, Nilotpal and Chen, Jingxi and Du, Fan and Manocha, Dinesh}, title = {MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9433-9443} }
MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors: Zhipeng Du,

Duolikun Danier,

Jan Eric Lenssen,

Hakan Bilen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Zhipeng and Danier, Duolikun and Lenssen, Jan Eric and Bilen, Hakan}, title = {MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7304-7314} }
VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack: Shiji Zhao,

Shukun Xiong,

Yao Huang,

Jin Yan,

Zhenyu Wu,

Jiyang Guan,

Ranjie Duan,

Jialing Tao,

Hui Xue,

Xingxing Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shiji and Xiong, Shukun and Huang, Yao and Yan, Jin and Wu, Zhenyu and Guan, Jiyang and Duan, Ranjie and Tao, Jialing and Xue, Hui and Wei, Xingxing}, title = {VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9412-9421} }
M-PhyGs: Multi-Material Object Dynamics from Video: Norika Wada,

Kohei Yamashita,

Ryo Kawahara,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wada_2026_CVPR, author = {Wada, Norika and Yamashita, Kohei and Kawahara, Ryo and Nishino, Ko}, title = {M-PhyGs: Multi-Material Object Dynamics from Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6404-6413} }
A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing: Maria Stoica,

Abdelrahman Hekal,

Alessio Lomuscio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stoica_2026_CVPR, author = {Stoica, Maria and Hekal, Abdelrahman and Lomuscio, Alessio}, title = {A Robust Out-of-Distribution Detection Framework via Synergistic Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {685-694} }
TokenErase: Robust Concept Erasure via Visual-Injected Token Optimization: Liangshun Zou,

Zhangkai Ni,

Hanli Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Liangshun and Ni, Zhangkai and Wang, Hanli}, title = {TokenErase: Robust Concept Erasure via Visual-Injected Token Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4129-4139} }
Activation-Norm Maximization to Accelerate Training in Flow-Matching Transformers: Yash Belhe,

Wesley Chang,

Tzu-Mao Li,

Ravi Ramamoorthi,

Michaël Gharbi; [pdf] [supp]
[bibtex]
@InProceedings{Belhe_2026_CVPR, author = {Belhe, Yash and Chang, Wesley and Li, Tzu-Mao and Ramamoorthi, Ravi and Gharbi, Micha\"el}, title = {Activation-Norm Maximization to Accelerate Training in Flow-Matching Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4089-4096} }
Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation: Zhe Huang,

Hao Wen,

Aiming Hao,

Bingze Song,

Meiqi Wu,

Jiahong Wu,

Xiangxiang Chu,

Sheng Lu,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhe and Wen, Hao and Hao, Aiming and Song, Bingze and Wu, Meiqi and Wu, Jiahong and Chu, Xiangxiang and Lu, Sheng and Wang, Haoqian}, title = {Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8153-8163} }
Learning to Translate Noise for Robust Image Denoising: Inju Ha,

Donghun Ryou,

Seonguk Seo,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ha_2026_CVPR, author = {Ha, Inju and Ryou, Donghun and Seo, Seonguk and Han, Bohyung}, title = {Learning to Translate Noise for Robust Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5034-5043} }
UGLMM: Towards Unified Vision Grounding with Large Multimodal Model: Xiangheng Shan,

Li Zhou,

Zenghui Sun,

Shichao Dong,

Nong Sang,

Jinsong Lan,

Xiaoyong Zhu,

Bo Zheng,

Changxin Gao,

Kaifu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Xiangheng and Zhou, Li and Sun, Zenghui and Dong, Shichao and Sang, Nong and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo and Gao, Changxin and Zhang, Kaifu}, title = {UGLMM: Towards Unified Vision Grounding with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5683-5693} }
MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation: Nuolin Sun,

Linyuan Wang,

Haonan Wei,

Lei Li,

Bin Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Nuolin and Wang, Linyuan and Wei, Haonan and Li, Lei and Yan, Bin}, title = {MFI-ResNet: Efficient ResNet Architecture Optimization via MeanFlow Compression and Selective Incubation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2691-2699} }
VideoScaffold: Elastic-Scale Visual Hierarchies for Streaming Video Understanding in MLLMs: Naishan Zheng,

Qingpei Guo,

Jie Huang,

Feng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Naishan and Guo, Qingpei and Huang, Jie and Zhao, Feng}, title = {VideoScaffold: Elastic-Scale Visual Hierarchies for Streaming Video Understanding in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5947-5957} }
Speed3R: Sparse Feed-forward 3D Reconstruction Models: Weining Ren,

Xiao Tan,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Weining and Tan, Xiao and Han, Kai}, title = {Speed3R: Sparse Feed-forward 3D Reconstruction Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {119-128} }
Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration: Bowen Tang,

Tao Wang,

Miao Zhang,

Xin Yu,

Jinwei Chen,

Bo Li,

Kaihao Zhang; [pdf]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Bowen and Wang, Tao and Zhang, Miao and Yu, Xin and Chen, Jinwei and Li, Bo and Zhang, Kaihao}, title = {Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8175-8184} }
FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning: Jintong Gao,

He Zhao,

Yibo Yang,

Dandan Guo; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jintong and Zhao, He and Yang, Yibo and Guo, Dandan}, title = {FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7737-7746} }
Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN: C. Zaghetto,

A. Purim,

W. Oliveira,

J. R. Ribeiro,

H. Nolla,

F. Santos,

M. Chang,

R. H. Vareto; [pdf] [supp]
[bibtex]
@InProceedings{Zaghetto_2026_CVPR, author = {Zaghetto, C. and Purim, A. and Oliveira, W. and Ribeiro, J. R. and Nolla, H. and Santos, F. and Chang, M. and Vareto, R. H.}, title = {Fingerprint Fragment Expansion using Image Outpainting Approach based on Spectral Normalization PatchGAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1221-1229} }
PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation: Gabriele Rosi,

Fabio Cermelli,

Carlo Masone,

Barbara Caputo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosi_2026_CVPR, author = {Rosi, Gabriele and Cermelli, Fabio and Masone, Carlo and Caputo, Barbara}, title = {PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7326-7336} }
GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models: Cong Ray,

Xiangwen Deng,

Feice Huang,

ZhengXian Wu,

Shen'ao Jiang,

Peng Jiao,

Zhifang Liu,

Haoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ray_2026_CVPR, author = {Ray, Cong and Deng, Xiangwen and Huang, Feice and Wu, ZhengXian and Jiang, Shen'ao and Jiao, Peng and Liu, Zhifang and Wang, Haoqian}, title = {GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9195-9204} }
Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation: Yeongsu Kim,

Seo-Yeon Choi,

Kyungsu Lee; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Yeongsu and Choi, Seo-Yeon and Lee, Kyungsu}, title = {Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8857-8867} }
Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking: Yuichiro Takeuchi,

Yusuke Imoto,

Shunya Kato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Takeuchi_2026_CVPR, author = {Takeuchi, Yuichiro and Imoto, Yusuke and Kato, Shunya}, title = {Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6880-6889} }
Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?: Jie Zhu,

Yiyang Su,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Su, Yiyang and Liu, Xiaoming}, title = {Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9401-9411} }
Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation: Jiaqi Bai,

Hongcheng Guo,

Jiaheng Liu,

Zhibo Zhou,

Jian Yang,

Feiran Huang; [pdf]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Jiaqi and Guo, Hongcheng and Liu, Jiaheng and Zhou, Zhibo and Yang, Jian and Huang, Feiran}, title = {Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9867-9877} }
Trajectory-Diversity-Driven Robust Vision-and-Language Navigation: Jiangyang Li,

Cong Wan,

SongLin Dong,

Chenhao Ding,

Qiang Wang,

Zhiheng Ma,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Wan, Cong and Dong, SongLin and Ding, Chenhao and Wang, Qiang and Ma, Zhiheng and Gong, Yihong}, title = {Trajectory-Diversity-Driven Robust Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9143-9154} }
Blend-Aware Latent Diffusion: Mitigating Stitched Seams in Image Inpainting: Yunpeng Liu,

Xingzhong Hou,

Jie Wu,

Boxiao Liu,

Yi Zhang,

Guanglu Song,

Yu Liu,

Changyao Tian,

Gen Luo,

Haihang You; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yunpeng and Hou, Xingzhong and Wu, Jie and Liu, Boxiao and Zhang, Yi and Song, Guanglu and Liu, Yu and Tian, Changyao and Luo, Gen and You, Haihang}, title = {Blend-Aware Latent Diffusion: Mitigating Stitched Seams in Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4678-4687} }
Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection: Hanvitha Saraswathi Mukkamala,

Arun K Pujari; [pdf] [supp]
[bibtex]
@InProceedings{Mukkamala_2026_CVPR, author = {Mukkamala, Hanvitha Saraswathi and Pujari, Arun K}, title = {Co-Adaptive Graph Learning Through Coupled Spectral Refinement for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1680-1689} }
Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs: Yuhui Lin,

Siyue Yu,

Yuxing Yang,

Guangliang Cheng,

Jimin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yuhui and Yu, Siyue and Yang, Yuxing and Cheng, Guangliang and Xiao, Jimin}, title = {Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8941-8951} }
GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction: Minsol Kim,

Usman Ali; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minsol and Ali, Usman}, title = {GauSDF: Signed Distance Embedded Gaussian Surfels for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {33-42} }
Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection: Wanqi Wang,

Jingcai Guo,

Yuxiang Cai,

Zhi Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Wanqi and Guo, Jingcai and Cai, Yuxiang and Chen, Zhi}, title = {Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7747-7757} }
Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning: Fankang Xu,

Lu Jin,

Yanpeng Sun,

Shiyu Xuan,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7717-7727} }
NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation: Quang Dang Huynh,

Xuefei Yin,

Andrew Busch,

Hugo G. Espinosa,

Alan Wee-Chung Liew,

Matthew T.O. Worsey,

Yanming Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huynh_2026_CVPR, author = {Huynh, Quang Dang and Yin, Xuefei and Busch, Andrew and Espinosa, Hugo G. and Liew, Alan Wee-Chung and Worsey, Matthew T.O. and Zhu, Yanming}, title = {NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8368-8377} }
DynaMind: Reconstructing Dynamic Visual Scenes from EEG by Aligning Temporal Dynamics and Multimodal Semantics to Guided Diffusion: Junxiang Liu,

Junming Lin,

Jie Zhou,

Wei Xiong,

Jiangtong Li,

Jie Li,

Jie Zhuang,

Hongfei Ji; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Junxiang and Lin, Junming and Zhou, Jie and Xiong, Wei and Li, Jiangtong and Li, Jie and Zhuang, Jie and Ji, Hongfei}, title = {DynaMind: Reconstructing Dynamic Visual Scenes from EEG by Aligning Temporal Dynamics and Multimodal Semantics to Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5378-5387} }
AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents: Yue Cao,

Yingyao Wang,

Pi Bu,

Jingxuan Xing,

Wei Jiang,

Zekun Zhu,

Junpeng Ma,

Sashuai Zhou,

Tong Lu,

Jun Song,

Yu Cheng,

Yuning Jiang,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yue and Wang, Yingyao and Bu, Pi and Xing, Jingxuan and Jiang, Wei and Zhu, Zekun and Ma, Junpeng and Zhou, Sashuai and Lu, Tong and Song, Jun and Cheng, Yu and Jiang, Yuning and Zheng, Bo}, title = {AndroidLens: Long-latency Evaluation with Nested Sub-targets for Android GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1903-1912} }
TP-Seg: Task-Prototype Framework for Unified Medical Lesion Segmentation: Jiawei Xu,

Qiangqiang Zhou,

Dandan Zhu,

Yong Chen,

Yugen Yi,

Xiaoqi Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiawei and Zhou, Qiangqiang and Zhu, Dandan and Chen, Yong and Yi, Yugen and Zhao, Xiaoqi}, title = {TP-Seg: Task-Prototype Framework for Unified Medical Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5452-5462} }
C^2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination: Yuyang Chen,

Kaiyan Zhao,

Yiming Wang,

Ming Yang,

Bin Rao,

Zhenning Li; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zhao, Kaiyan and Wang, Yiming and Yang, Ming and Rao, Bin and Li, Zhenning}, title = {C{\textasciicircum}2T: Captioning-Structure and LLM-Aligned Common-Sense Reward Learning for Traffic-Vehicle Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1128-1137} }
OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition: Junhui Yin,

Zhizhen Cai,

Puze Wang,

Guanzhou Ke,

Jianhua Yang,

Man Zhang,

Qiang Zhang,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Junhui and Cai, Zhizhen and Wang, Puze and Ke, Guanzhou and Yang, Jianhua and Zhang, Man and Zhang, Qiang and He, Shengfeng}, title = {OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6717-6726} }
Generative Event Pretraining with Foundation Model Alignment: Jianwen Cao,

Jiaxu Xing,

Nico Messikommer,

Davide Scaramuzza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Jianwen and Xing, Jiaxu and Messikommer, Nico and Scaramuzza, Davide}, title = {Generative Event Pretraining with Foundation Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3189-3199} }
RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning: Suhang Hu,

Wei Hu,

Yuhang Su,

Fan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Suhang and Hu, Wei and Su, Yuhang and Zhang, Fan}, title = {RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9878-9887} }
Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss: Hongye Xu,

Bartosz Krawczyk; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Hongye and Krawczyk, Bartosz}, title = {Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7852-7861} }
DARTS: Distance-Aware Robust Training for Selective Classification: A. Q. M. Sazzad Sayyed,

Nathaniel D. Bastian,

Francesco Restuccia; [pdf] [supp]
[bibtex]
@InProceedings{Sayyed_2026_CVPR, author = {Sayyed, A. Q. M. Sazzad and Bastian, Nathaniel D. and Restuccia, Francesco}, title = {DARTS: Distance-Aware Robust Training for Selective Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8806-8815} }
Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model: Zhiming Liu,

Yujie Wei,

Lei Feng,

Xiu Su,

Xiaobo Xia,

Weili Guan,

Zeke Xie,

Shuo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiming and Wei, Yujie and Feng, Lei and Su, Xiu and Xia, Xiaobo and Guan, Weili and Xie, Zeke and Yang, Shuo}, title = {Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9597-9607} }
D^2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting: Xiaoai Wang,

Hang Wang,

Yan Liu,

Huan Hu,

Bruce X.B. Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoai and Wang, Hang and Liu, Yan and Hu, Huan and Yu, Bruce X.B.}, title = {D{\textasciicircum}2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8205-8214} }
Generated Reality: Human-Centric World Simulation Using Interactive Video Generation with Hand and Camera Control: Linxi Xie,

Lisong C. Sun,

Ashley Neall,

Tong Wu,

Shengqu Cai,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Linxi and Sun, Lisong C. and Neall, Ashley and Wu, Tong and Cai, Shengqu and Wetzstein, Gordon}, title = {Generated Reality: Human-Centric World Simulation Using Interactive Video Generation with Hand and Camera Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3998-4008} }
Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models: Gracjan Goral,

Alicja Ziarko,

Piotr Milos,

Michal Nauman,

Maciej Wolczyk,

Michal Kosinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goral_2026_CVPR, author = {Goral, Gracjan and Ziarko, Alicja and Milos, Piotr and Nauman, Michal and Wolczyk, Maciej and Kosinski, Michal}, title = {Beyond Recognition: Evaluating Visual Perspective Taking in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1721-1730} }
Parallel In-context Learning for Large Vision Language Models: Shin'ya Yamaguchi,

Daiki Chijiwa,

Tamao Sakao,

Taku Hasegawa; [pdf] [arXiv]
[bibtex]
@InProceedings{Yamaguchi_2026_CVPR, author = {Yamaguchi, Shin'ya and Chijiwa, Daiki and Sakao, Tamao and Hasegawa, Taku}, title = {Parallel In-context Learning for Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5796-5806} }
Training-Free Cross-Modal Alignment via Anchor Profiles with Statistical Significance Testing: Kuo Yang,

Jianglin Lu,

Yun Fu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Kuo and Lu, Jianglin and Fu, Yun}, title = {Training-Free Cross-Modal Alignment via Anchor Profiles with Statistical Significance Testing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5704-5713} }
BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching: Zhien Dai,

Zhaohui Tang,

Hu Zhang,

Mingjun Pan,

Jin Luo,

Yongfang Xie; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Zhien and Tang, Zhaohui and Zhang, Hu and Pan, Mingjun and Luo, Jin and Xie, Yongfang}, title = {BLMT-Stereo: Breaking the Local Minima Trap of Iterative Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1-11} }
mmDiff: A Noise-Robust Differentiable Ray-Tracing Framework for mmWave Scene Calibration and Channel Prediction: Haofan Lu,

Yadi Cao,

Wanghao Yi,

Omid Abari; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Haofan and Cao, Yadi and Yi, Wanghao and Abari, Omid}, title = {mmDiff: A Noise-Robust Differentiable Ray-Tracing Framework for mmWave Scene Calibration and Channel Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6435-6444} }
MARIO: Motion-Augmented Real-Time Multi-Sensor Inertial Odometry: Yiquan Li,

Taeyoung Yeon,

Chenfeng Gao,

Vasco Xu,

Xuanyou Liu,

Karan Ahuja; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yiquan and Yeon, Taeyoung and Gao, Chenfeng and Xu, Vasco and Liu, Xuanyou and Ahuja, Karan}, title = {MARIO: Motion-Augmented Real-Time Multi-Sensor Inertial Odometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3460-3469} }
A Simple yet Effective Data Scaling Strategy for Semi-Supervised Medical Image Segmentation: Yajun Liu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yajun}, title = {A Simple yet Effective Data Scaling Strategy for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5255-5264} }
FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting: Tianhao Xie,

Linlian Jiang,

Xinxin Zuo,

Yang Wang,

Tiberiu Popa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tianhao and Jiang, Linlian and Zuo, Xinxin and Wang, Yang and Popa, Tiberiu}, title = {FACT-GS: Frequency-Aligned Complexity-Aware Texture Reparameterization for 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {202-212} }
Efficient Unlearning through Maximizing Relearning Convergence Delay: Khoa Tran,

Simon S. Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Khoa and Woo, Simon S.}, title = {Efficient Unlearning through Maximizing Relearning Convergence Delay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7968-7977} }
Deep Parameter Interpolation for Scalar Conditioning: Chicago Y. Park,

Michael T. McCann,

Cristina Garcia-Cardona,

Brendt Wohlberg,

Ulugbek S. Kamilov; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Chicago Y. and McCann, Michael T. and Garcia-Cardona, Cristina and Wohlberg, Brendt and Kamilov, Ulugbek S.}, title = {Deep Parameter Interpolation for Scalar Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3884-3892} }
Zero-Shot Textual Explanations via Translating Decision-Critical Features: Toshinori Yamauchi,

Hiroshi Kera,

Kazuhiko Kawamoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamauchi_2026_CVPR, author = {Yamauchi, Toshinori and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Zero-Shot Textual Explanations via Translating Decision-Critical Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3282-3292} }
Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification: Lulu Fang,

Jiaxiang Qin,

Ruiheng Yan,

Ning Pan,

Haihua Liu,

Xinxin Chen; [pdf]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Lulu and Qin, Jiaxiang and Yan, Ruiheng and Pan, Ning and Liu, Haihua and Chen, Xinxin}, title = {Deep Feedback ConvNets by Embedding the Working Memory Module for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2564-2574} }
HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation: Keito Suzuki,

Kunyao Chen,

Lei Wang,

Bang Du,

Runfa Blark Li,

Peng Liu,

Ning Bi,

Truong Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Suzuki_2026_CVPR, author = {Suzuki, Keito and Chen, Kunyao and Wang, Lei and Du, Bang and Li, Runfa Blark and Liu, Peng and Bi, Ning and Nguyen, Truong}, title = {HumanOrbit: 3D Human Reconstruction as 360deg Orbit Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {624-634} }
Channel Correlation Loss for Binary Neural Networks: Xindi Zuo,

Wei Zhang,

Hai Yu,

Zhiliang Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Xindi and Zhang, Wei and Yu, Hai and Zhu, Zhiliang}, title = {Channel Correlation Loss for Binary Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2575-2584} }
Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation: Jacey Somers,

Harrison Zale,

Janine Mason,

Tina Walker,

Eddie Quinn,

Felix Lewis,

Gavin Wright,

Yvonne Young,

Charles Sullivan,

Wayne Carter,

Julian Foster; [pdf]
[bibtex]
@InProceedings{Somers_2026_CVPR, author = {Somers, Jacey and Zale, Harrison and Mason, Janine and Walker, Tina and Quinn, Eddie and Lewis, Felix and Wright, Gavin and Young, Yvonne and Sullivan, Charles and Carter, Wayne and Foster, Julian}, title = {Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8534-8543} }
Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding: Yuefei Chen,

Jiang Liu,

Xiaodong Lin,

Ruixiang Tang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuefei and Liu, Jiang and Lin, Xiaodong and Tang, Ruixiang}, title = {Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9165-9174} }
JACoP: Joint Alignment for Compliant Multi-Agent Prediction: Qingze Tony Liu,

Alen Mrdovic,

Danrui Li,

Mathew Schwartz,

Sejong Yoon,

Mubbasir Kapadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qingze Tony and Mrdovic, Alen and Li, Danrui and Schwartz, Mathew and Yoon, Sejong and Kapadia, Mubbasir}, title = {JACoP: Joint Alignment for Compliant Multi-Agent Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {910-919} }
Learning from Noisy Prompts: Saliency-Guided Prompt Distillation for Robust Segmentation with SAM: Jingxuan Kang,

Ziqi Zhang,

Shaoming Zheng,

Shuang Li,

Uday Bharat Patel,

Alexander Harry Fitzhugh,

Phillip Lung,

Yusuf Kiberu,

Nikesh Jathanna,

Shahnaz Jamil-Copley,

Bernhard Kainz,

Chen Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Jingxuan and Zhang, Ziqi and Zheng, Shaoming and Li, Shuang and Patel, Uday Bharat and Fitzhugh, Alexander Harry and Lung, Phillip and Kiberu, Yusuf and Jathanna, Nikesh and Jamil-Copley, Shahnaz and Kainz, Bernhard and Qin, Chen}, title = {Learning from Noisy Prompts: Saliency-Guided Prompt Distillation for Robust Segmentation with SAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5224-5234} }
Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models: Songlong Xing,

Weijie Wang,

Zhengyu Zhao,

Jindong Gu,

Philip Torr,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Songlong and Wang, Weijie and Zhao, Zhengyu and Gu, Jindong and Torr, Philip and Sebe, Nicu}, title = {Finetune Like You Pretrain: Boosting Zero-shot Adversarial Robustness in Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {737-747} }
LLM Guided Multi Style Typography and Layout Generation via Dynamic Direct Preference Optimization: Chen Fu,

Shengzhou Yi,

Ling Xiao,

Toshihiko Yamasaki; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Chen and Yi, Shengzhou and Xiao, Ling and Yamasaki, Toshihiko}, title = {LLM Guided Multi Style Typography and Layout Generation via Dynamic Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5725-5734} }
Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training: Hu Lin,

Chengjiang Long,

Jiqing Zhang,

Chuanlu Jiang,

Huilin Ge,

Erwei Yin,

Baocai Yin,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Hu and Long, Chengjiang and Zhang, Jiqing and Jiang, Chuanlu and Ge, Huilin and Yin, Erwei and Yin, Baocai and Yang, Xin}, title = {Unlocking Single-View Constraints for Efficient Camera Relocalization with Keypoint-Level Multi-View Geometric Consistency in Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1335-1345} }
Revisiting Articulated Parts Perception in Robot Manipulation: Xiaoqian Wu,

Yejie Guo,

Xiaoyang Chen,

Lixin Yang,

Cewu Lu,

Yong-Lu Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoqian and Guo, Yejie and Chen, Xiaoyang and Yang, Lixin and Lu, Cewu and Li, Yong-Lu}, title = {Revisiting Articulated Parts Perception in Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1368-1377} }
Seeing Helps Reasoning in Language Models: Yulu Gan,

Kaiya Ivy Zhao,

Tomaso Poggio,

Phillip Isola; [pdf] [supp]
[bibtex]
@InProceedings{Gan_2026_CVPR, author = {Gan, Yulu and Zhao, Kaiya Ivy and Poggio, Tomaso and Isola, Phillip}, title = {Seeing Helps Reasoning in Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7080-7090} }
FlowSteer: Conditioning Flow Field for Consistent Image Restoration: Tharindu Wickremasinghe,

Chenyang Qi,

Harshana Weligampola,

Zhengzhong Tu,

Stanley H. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wickremasinghe_2026_CVPR, author = {Wickremasinghe, Tharindu and Qi, Chenyang and Weligampola, Harshana and Tu, Zhengzhong and Chan, Stanley H.}, title = {FlowSteer: Conditioning Flow Field for Consistent Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5106-5116} }
DraCo: Draft as CoT for Text-to-Image Preview and Rare Concept Generation: Dongzhi Jiang,

Renrui Zhang,

Haodong Li,

Zhuofan Zong,

Ziyu Guo,

Jun He,

Claire Guo,

Junyan Ye,

Rongyao Fang,

Weijia Li,

Rui Liu,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Dongzhi and Zhang, Renrui and Li, Haodong and Zong, Zhuofan and Guo, Ziyu and He, Jun and Guo, Claire and Ye, Junyan and Fang, Rongyao and Li, Weijia and Liu, Rui and Li, Hongsheng}, title = {DraCo: Draft as CoT for Text-to-Image Preview and Rare Concept Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5903-5914} }
Vision Language Models are Confused Tourists: Patrick Amadeus Irawan,

Ikhlasul Akmal Hanif,

Muhammad Dehan Al Kautsar,

Genta Indra Winata,

Fajri Koto,

Alham Fikri Aji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Irawan_2026_CVPR, author = {Irawan, Patrick Amadeus and Hanif, Ikhlasul Akmal and Al Kautsar, Muhammad Dehan and Winata, Genta Indra and Koto, Fajri and Aji, Alham Fikri}, title = {Vision Language Models are Confused Tourists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1763-1773} }
TAPNext++: What's Next for Tracking Any Point (TAP)?: Sebastian Jung,

Artem Zholus,

Martin Sundermeyer,

Carl Doersch,

Ross Goroshin,

David Joseph Tan,

Sarath Chandar,

Rudolph Triebel,

Federico Tombari; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Sebastian and Zholus, Artem and Sundermeyer, Martin and Doersch, Carl and Goroshin, Ross and Tan, David Joseph and Chandar, Sarath and Triebel, Rudolph and Tombari, Federico}, title = {TAPNext++: What's Next for Tracking Any Point (TAP)?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8429-8438} }
RelativeFlow: Taming Medical Image Denoising Learning with Noisy Reference: Yuxin Liu,

Yiqing Dong,

Wenxue Yu,

Zhan Wu,

Rongjun Ge,

Yang Chen,

Yuting He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxin and Dong, Yiqing and Yu, Wenxue and Wu, Zhan and Ge, Rongjun and Chen, Yang and He, Yuting}, title = {RelativeFlow: Taming Medical Image Denoising Learning with Noisy Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5673-5682} }
HypHOI: Exploring Hierarchical Hyperbolic Embeddings for Human-Object Interaction Detection: Yixin Guo,

Yu Liu,

Weimin Wang,

Yanming Guo,

Qi Jia; [pdf]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yixin and Liu, Yu and Wang, Weimin and Guo, Yanming and Jia, Qi}, title = {HypHOI: Exploring Hierarchical Hyperbolic Embeddings for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6517-6527} }
Drive-Cascade: Autoregressive Occupancy to LiDAR and Video Synthesis: Shuangming Lei,

Yuehao Huang,

Yao Yi,

Yijia Xie,

Jingke Wang,

Ruoyu Wang,

Jiajun Lv,

Guanglin Xu,

AiXue Ye,

Bingbing Liu,

Siyuan Cheng,

Hongbo Zhang,

Yukai Ma,

Yong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Shuangming and Huang, Yuehao and Yi, Yao and Xie, Yijia and Wang, Jingke and Wang, Ruoyu and Lv, Jiajun and Xu, Guanglin and Ye, AiXue and Liu, Bingbing and Cheng, Siyuan and Zhang, Hongbo and Ma, Yukai and Liu, Yong}, title = {Drive-Cascade: Autoregressive Occupancy to LiDAR and Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4552-4561} }
From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage: Cihan Ruan,

Lebin Zhou,

Bingqing Zhao,

Rongduo Han,

Qiming Yuan,

Chenchen Zhu,

Linyi Han,

Liang Yang,

Wei Wang,

Wei Jiang,

Nam Ling; [pdf] [arXiv]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Cihan and Zhou, Lebin and Zhao, Bingqing and Han, Rongduo and Yuan, Qiming and Zhu, Chenchen and Han, Linyi and Yang, Liang and Wang, Wei and Jiang, Wei and Ling, Nam}, title = {From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8544-8553} }
A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing: Shiwei Ding,

Lan Zhang,

Zhenlin Wang,

Xiaoyong Yuan; [pdf]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Shiwei and Zhang, Lan and Wang, Zhenlin and Yuan, Xiaoyong}, title = {A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8039-8048} }
ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation: Wenyang Chen,

Zhanxuan Hu,

Yaping Zhang,

Hailong Ning,

Yonghang Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Wenyang and Hu, Zhanxuan and Zhang, Yaping and Ning, Hailong and Tai, Yonghang}, title = {ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7408-7418} }
Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs: Tianle Chen,

Chaitanya Chakka,

Arjun Reddy Akula,

Xavier Thomas,

Deepti Ghadiyaram; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tianle and Chakka, Chaitanya and Akula, Arjun Reddy and Thomas, Xavier and Ghadiyaram, Deepti}, title = {Some Modalities are More Equal Than Others: Decoding and Architecting Multimodal Integration in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2142-2151} }
Discovering Attention Head Interactions in Vision Transformers: Zhenyu Lu,

Yuheng Jia,

Wei You,

Hao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zhenyu and Jia, Yuheng and You, Wei and Chen, Hao}, title = {Discovering Attention Head Interactions in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3332-3342} }
Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination: Yolo Yunlong Tang,

Daiki Shimada,

Hang Hua,

Chao Huang,

Jing Bi,

Rogerio Feris,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Yunlong and Shimada, Daiki and Hua, Hang and Huang, Chao and Bi, Jing and Feris, Rogerio and Xu, Chenliang}, title = {Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8314-8325} }
Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning: Yushuo Zheng,

Huiyu Duan,

Zicheng Zhang,

Xiaohong Liu,

Xiongkuo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yushuo and Duan, Huiyu and Zhang, Zicheng and Liu, Xiaohong and Min, Xiongkuo}, title = {Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7208-7219} }
Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework: Hongrui Jia,

Chaoya Jiang,

Shikun Zhang,

Wei Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Zhang, Shikun and Ye, Wei}, title = {Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9574-9585} }
MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer: Jian Zhong,

Yifan Jiao,

Xi Shao,

Bing-Kun Bao; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Jian and Jiao, Yifan and Shao, Xi and Bao, Bing-Kun}, title = {MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7758-7767} }
SwiftPie: Lightning-fast Subject-driven Image Personalization via One step Diffusion: Huy Duong,

Trong-Tung Nguyen,

Cuong Pham,

Anh Tran,

Khoi Nguyen,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duong_2026_CVPR, author = {Duong, Huy and Nguyen, Trong-Tung and Pham, Cuong and Tran, Anh and Nguyen, Khoi and Hoai, Minh}, title = {SwiftPie: Lightning-fast Subject-driven Image Personalization via One step Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4708-4718} }
When Models Learn to Ask Why: Adaptive Causal Reasoning for Trustworthy Medical Vision-Language Models: Jianxin Lin,

Chunzheng Zhu,

Peter J Kneuertz,

Yunfei Bai,

Yuan Xue; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jianxin and Zhu, Chunzheng and Kneuertz, Peter J and Bai, Yunfei and Xue, Yuan}, title = {When Models Learn to Ask Why: Adaptive Causal Reasoning for Trustworthy Medical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5556-5568} }
MVSSM: Motion-aware Visual State Space Model for Efficient Video Deblurring: Chen Zhou,

Tao Wu,

Wei Liu,

Xi Wu,

Ying Fu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chen and Wu, Tao and Liu, Wei and Wu, Xi and Fu, Ying}, title = {MVSSM: Motion-aware Visual State Space Model for Efficient Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4855-4865} }
Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling: Camille Biscarrat,

Michaël Gharbi,

Rahul Goel,

Jonathan Ragan-Kelley,

Frédo Durand,

Tzu-Mao Li; [pdf] [supp]
[bibtex]
@InProceedings{Biscarrat_2026_CVPR, author = {Biscarrat, Camille and Gharbi, Micha\"el and Goel, Rahul and Ragan-Kelley, Jonathan and Durand, Fr\'edo and Li, Tzu-Mao}, title = {Adaptive Continuous Kernel Networks for Image Reconstruction from Non-Uniform Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1283-1293} }
From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity: Haoming Liu,

Jinnuo Liu,

Yanhao Li,

Liuyang Bai,

Yunkai Ji,

Yuanhe Guo,

Shenji Wan,

Hongyi Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haoming and Liu, Jinnuo and Li, Yanhao and Bai, Liuyang and Ji, Yunkai and Guo, Yuanhe and Wan, Shenji and Wen, Hongyi}, title = {From Navigation to Refinement: Revealing the Two-Stage Nature of Flow-based Diffusion Models through Oracle Velocity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2649-2658} }
HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet: Badri N Patro,

Vijay S Agneeswaran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {HAMSA: Scanning-Free Vision State Space Models via SpectralPulseNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2408-2418} }
Learning to Select Visual In-Context Demonstrations: Eugene Lee,

Yu-Chi Lin,

Jiajie Diao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Eugene and Lin, Yu-Chi and Diao, Jiajie}, title = {Learning to Select Visual In-Context Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9455-9465} }
FSMC-Pose: Frequency and Spatial Fusion with Multiscale Self-Calibration for Cattle Mounting Pose Estimation: Fangjing Li,

Zhihai Wang,

Xinxin Ding,

Haiyang Liu,

Ronghua Gao,

Rong Wang,

Yao Zhu,

Ming Jin; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Fangjing and Wang, Zhihai and Ding, Xinxin and Liu, Haiyang and Gao, Ronghua and Wang, Rong and Zhu, Yao and Jin, Ming}, title = {FSMC-Pose: Frequency and Spatial Fusion with Multiscale Self-Calibration for Cattle Mounting Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3620-3629} }
Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering: Xiaobin Deng,

Changyu Diao,

Min Li,

Ruohan Yu,

Duanqing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaobin and Diao, Changyu and Li, Min and Yu, Ruohan and Xu, Duanqing}, title = {Improving Densification in 3D Gaussian Splatting for High-Fidelity Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {223-232} }
ZODS-RS -- Zero-Training Oriented Detection & Segmentation for Remote Sensing: Zuan Gu,

Tianhan Gao,

Langxu Zhao; [pdf]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zuan and Gao, Tianhan and Zhao, Langxu}, title = {ZODS-RS -- Zero-Training Oriented Detection \& Segmentation for Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6322-6330} }
Concise Geometric Description as a Bridge: Unleashing the Potential of LLM for Plane Geometric Problem Solving: Jingyun Wang,

Dian Li,

Xiaohan Wang,

Gang Liu,

Jiahong Yan,

Guoliang Kang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jingyun and Li, Dian and Wang, Xiaohan and Liu, Gang and Yan, Jiahong and Kang, Guoliang}, title = {Concise Geometric Description as a Bridge: Unleashing the Potential of LLM for Plane Geometric Problem Solving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5958-5967} }
Rich Feature Learning via Diversification: Xi Leng,

Yongqiang Chen,

Xiaoying Tang,

Yatao Bian; [pdf] [supp]
[bibtex]
@InProceedings{Leng_2026_CVPR, author = {Leng, Xi and Chen, Yongqiang and Tang, Xiaoying and Bian, Yatao}, title = {Rich Feature Learning via Diversification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2462-2472} }
MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation: Ziyi Wang,

Xianping Ma,

Ziyao Wang,

Hongyang Zhang,

Man On Pun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyi and Ma, Xianping and Wang, Ziyao and Zhang, Hongyang and Pun, Man On}, title = {MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7251-7261} }
Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization: Qiji Ma,

Chuanguang Yang,

Zhulin An,

Libo Huang,

Erhu Zhao,

Yuqi Li,

Yongjun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Qiji and Yang, Chuanguang and An, Zhulin and Huang, Libo and Zhao, Erhu and Li, Yuqi and Xu, Yongjun}, title = {Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9531-9541} }
SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting: Yonghan Lee,

Tsung-Wei Huang,

Shiv Gehlot,

Jaehoon Choi,

Guan-Ming Su,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yonghan and Huang, Tsung-Wei and Gehlot, Shiv and Choi, Jaehoon and Su, Guan-Ming and Manocha, Dinesh}, title = {SyncTrack4D: Cross-Video Motion Alignment and Video Synchronization with Multi-Video 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {77-87} }
When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization: Md Zarif Hossain,

Awal Ahmed Fime,

Ahmed Imteaj; [pdf] [supp]
[bibtex]
@InProceedings{Hossain_2026_CVPR, author = {Hossain, Md Zarif and Fime, Awal Ahmed and Imteaj, Ahmed}, title = {When Data is Scarce, Learn to Adapt: Robust Federated Learning via Adversarial Meta-Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {758-767} }
PTAD: Pose and Texture Agnostic Anomaly Detection: Wei Zhuo,

Jianen Xiang,

Miaomiao Liu,

Huajun Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhuo_2026_CVPR, author = {Zhuo, Wei and Xiang, Jianen and Liu, Miaomiao and Lu, Huajun}, title = {PTAD: Pose and Texture Agnostic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6779-6788} }
Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data: Yizhao Xu,

Hongyuan Zhu,

Caiyun Liu,

Tianfu Wang,

Keyu Chen,

Sicheng Xu,

Jiaolong Yang,

Nicholas jing Yuan,

Qi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yizhao and Zhu, Hongyuan and Liu, Caiyun and Wang, Tianfu and Chen, Keyu and Xu, Sicheng and Yang, Jiaolong and Yuan, Nicholas jing and Zhang, Qi}, title = {Beyond Voxel 3D Editing : Learning from 3D Masks and Self-Constructed Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {635-646} }
Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation: Junhao Xia,

Chaoyang Zhang,

Yecheng Zhang,

Chengyang Zhou,

Zhichang Wang,

Bochun Liu,

Dongshuo Yin; [pdf]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Junhao and Zhang, Chaoyang and Zhang, Yecheng and Zhou, Chengyang and Wang, Zhichang and Liu, Bochun and Yin, Dongshuo}, title = {Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8607-8617} }
BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models: Ba Luan Dang,

Vu Tuan Truong,

Long Bao Le; [pdf] [supp]
[bibtex]
@InProceedings{Dang_2026_CVPR, author = {Dang, Ba Luan and Truong, Vu Tuan and Le, Long Bao}, title = {BadVLM: Towards Efficient and Resilient Backdoor Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {726-736} }
GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework: Pengyu Zeng,

Yuqin Dai,

Jun Yin,

Jing Zhong,

Ziyang Han,

Chaoyang Shi,

ZhanXiang Jin,

Maowei Jiang,

Yuxing Han,

Shuai Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Pengyu and Dai, Yuqin and Yin, Jun and Zhong, Jing and Han, Ziyang and Shi, Chaoyang and Jin, ZhanXiang and Jiang, Maowei and Han, Yuxing and Lu, Shuai}, title = {GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8596-8606} }
Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space: Kaiwen Wang,

Kaili Zheng,

Yiming Shi,

Chenyi Guo,

Ji Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Kaiwen and Zheng, Kaili and Shi, Yiming and Guo, Chenyi and Wu, Ji}, title = {Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3532-3542} }
Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization: Xiaoxi Yang,

Bo Sun,

Yisheng An,

Ganchao Liu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiaoxi and Sun, Bo and An, Yisheng and Liu, Ganchao}, title = {Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7188-7197} }
Meta-CDMTransNet: Cross-Domain Multi-Scale Transformer Meta-Learning Framework for Few-Shot Breast Histopathological Image Classification: Anindita Mohanta,

Sourav Dey Roy,

Priya Saha,

Niharika Nath,

Mrinal Kanti Bhowmik; [pdf]
[bibtex]
@InProceedings{Mohanta_2026_CVPR, author = {Mohanta, Anindita and Roy, Sourav Dey and Saha, Priya and Nath, Niharika and Bhowmik, Mrinal Kanti}, title = {Meta-CDMTransNet: Cross-Domain Multi-Scale Transformer Meta-Learning Framework for Few-Shot Breast Histopathological Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5525-5534} }
PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer: David Picard,

Nicolas Dufour,

Lucas Degeorge,

Arijit Ghosh,

Davide Allegro,

Tom Ravaud,

Yohann Perron,

Corentin Sautier,

Zeynep Sonat Baltaci,

Fei Meng,

Syrine Kalleli,

Marta López-Rauhut,

Thibaut Loiseau,

Ségolène Albouy,

Raphael Baena,

Elliot Vincent,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Picard_2026_CVPR, author = {Picard, David and Dufour, Nicolas and Degeorge, Lucas and Ghosh, Arijit and Allegro, Davide and Ravaud, Tom and Perron, Yohann and Sautier, Corentin and Baltaci, Zeynep Sonat and Meng, Fei and Kalleli, Syrine and L\'opez-Rauhut, Marta and Loiseau, Thibaut and Albouy, S\'egol\`ene and Baena, Raphael and Vincent, Elliot and Landrieu, Loic}, title = {PoM: A Linear-Time Replacement for Attention with the Polynomial Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2544-2553} }
D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models: Wenlun Zhang,

Yunshan Zhong,

Zihao Ding,

Xinyu Li,

Kentaro Yoshioka; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenlun and Zhong, Yunshan and Ding, Zihao and Li, Xinyu and Yoshioka, Kentaro}, title = {D4C: Data-Free Quantization for Contrastive Language-Image Pre-Training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2978-2987} }
Label-Agnostic Category Discovery: Yuwei Bian,

Shidong Wang,

Chunming Li,

Haofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2026_CVPR, author = {Bian, Yuwei and Wang, Shidong and Li, Chunming and Zhang, Haofeng}, title = {Label-Agnostic Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7573-7582} }
CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios: Hangyu Li,

Bofeng Cao,

Zhaohui Liang,

Wuzhen Li,

Juyoung Oh,

Yuxuan Chen,

Shixiao Liang,

Hang Zhou,

Chengyuan Ma,

Jiaxi Liu,

Zheng Li,

Peng Zhang,

Keke Long,

Maolin Liu,

Jackson Jiang,

Chunlei Yu,

Shengxiang Liu,

Hongkai Yu,

Xiaopeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hangyu and Cao, Bofeng and Liang, Zhaohui and Li, Wuzhen and Oh, Juyoung and Chen, Yuxuan and Liang, Shixiao and Zhou, Hang and Ma, Chengyuan and Liu, Jiaxi and Li, Zheng and Zhang, Peng and Long, Keke and Liu, Maolin and Jiang, Jackson and Yu, Chunlei and Liu, Shengxiang and Yu, Hongkai and Li, Xiaopeng}, title = {CATS-V2V: A Real-World Vehicle-to-Vehicle Cooperative Perception Dataset with Complex Adverse Traffic Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2294-2303} }
Autoregressive Universal Video Segmentation Model: Miran Heo,

Sukjun Hwang,

Min-Hung Chen,

Yu-Chiang Frank Wang,

Albert Gu,

Seon Joo Kim,

Ryo Hachiuma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2026_CVPR, author = {Heo, Miran and Hwang, Sukjun and Chen, Min-Hung and Wang, Yu-Chiang Frank and Gu, Albert and Kim, Seon Joo and Hachiuma, Ryo}, title = {Autoregressive Universal Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7429-7438} }
RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer: Liu Liu,

Xiaofeng Wang,

Guosheng Zhao,

Keyu Li,

Wenkang Qin,

Jiagang Zhu,

Jiaxiong Qiu,

Guan Huang,

Zhizhong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Liu and Wang, Xiaofeng and Zhao, Guosheng and Li, Keyu and Qin, Wenkang and Zhu, Jiagang and Qiu, Jiaxiong and Huang, Guan and Su, Zhizhong}, title = {RoboTransfer: Controllable Geometry-Consistent Video Diffusion for Manipulation Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1410-1420} }
LiteEmbed: Adapting CLIP to Rare Classes: Aishwarya Agarwal,

Srikrishna Karanam,

Vineet Gandhi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agarwal_2026_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {LiteEmbed: Adapting CLIP to Rare Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6133-6142} }
HAFM: A Post-Fusion Gating Module for Haze-Aware RGB-Thermal Object Detection: Juan M. Saeteros,

Nick J. Arévalo,

Boris X. Vintimilla; [pdf] [supp]
[bibtex]
@InProceedings{Saeteros_2026_CVPR, author = {Saeteros, Juan M. and Ar\'evalo, Nick J. and Vintimilla, Boris X.}, title = {HAFM: A Post-Fusion Gating Module for Haze-Aware RGB-Thermal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6197-6207} }
Inf-Dehaze: Beyond GPU Memory Constraints for Ultra-High-Resolution Image Dehazing: Xinyu Yan,

Jiuchen Chen,

Qizhi Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Xinyu and Chen, Jiuchen and Xu, Qizhi}, title = {Inf-Dehaze: Beyond GPU Memory Constraints for Ultra-High-Resolution Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5086-5095} }
SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction: Kao Zhang,

Tao Song,

Zhihua Hu,

Ming Li,

Xin Ding; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kao and Song, Tao and Hu, Zhihua and Li, Ming and Ding, Xin}, title = {SGST-Transformer: A Spherical Geometry-Aware Spatio-Temporal Transformer for 360deg Video Saliency Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2596-2605} }
Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers: Sayan Kumar Chaki,

Thierry Fournel,

Rémi Emonet; [pdf] [supp]
[bibtex]
@InProceedings{Chaki_2026_CVPR, author = {Chaki, Sayan Kumar and Fournel, Thierry and Emonet, R\'emi}, title = {Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7008-7017} }
UniLat3D: Geometry-Appearance Unified Latents for Single-Stage 3D Generation: Guanjun Wu,

Jiemin Fang,

Chen Yang,

Sikuang Li,

Taoran Yi,

Jia Lu,

Zanwei Zhou,

Jiazhong Cen,

Lingxi Xie,

Xiaopeng Zhang,

Wei Wei,

Wenyu Liu,

Xinggang Wang,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Guanjun and Fang, Jiemin and Yang, Chen and Li, Sikuang and Yi, Taoran and Lu, Jia and Zhou, Zanwei and Cen, Jiazhong and Xie, Lingxi and Zhang, Xiaopeng and Wei, Wei and Liu, Wenyu and Wang, Xinggang and Tian, Qi}, title = {UniLat3D: Geometry-Appearance Unified Latents for Single-Stage 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4366-4378} }
VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning: Bo Pang,

Chenxi Xu,

Jierui Ren,

Guoping Wang,

Sheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2026_CVPR, author = {Pang, Bo and Xu, Chenxi and Ren, Jierui and Wang, Guoping and Li, Sheng}, title = {VibraVerse: A Large-Scale Geometry-Acoustics Alignment Dataset for Physically-Consistent Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2028-2037} }
RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies: Guillermo Garcia-Cobo,

Maximilian Igl,

Peter Karkus,

Zhejun Zhang,

Michael Watson,

Yuxiao Chen,

Boris Ivanovic,

Marco Pavone; [pdf] [supp]
[bibtex]
@InProceedings{Garcia-Cobo_2026_CVPR, author = {Garcia-Cobo, Guillermo and Igl, Maximilian and Karkus, Peter and Zhang, Zhejun and Watson, Michael and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {RoaD: Rollouts as Demonstrations for Closed-Loop Supervised Fine-Tuning of Autonomous Driving Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1000-1009} }
Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models: Xi Zhang,

Hanwei Zhu,

Jiamang Wang,

Xiaolin Wu,

Weisi Lin; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xi and Zhu, Hanwei and Wang, Jiamang and Wu, Xiaolin and Lin, Weisi}, title = {Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9305-9315} }
IM-Animation: An Implicit Motion Representation for Identity-Decoupled Character Animation: Zhufeng Xu,

Xuan Gao,

Feng-Lin Liu,

Haoxian Zhang,

Zhixue Fang,

Yu-Kun Lai,

Xiaoqiang Liu,

Pengfei Wan,

Lin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhufeng and Gao, Xuan and Liu, Feng-Lin and Zhang, Haoxian and Fang, Zhixue and Lai, Yu-Kun and Liu, Xiaoqiang and Wan, Pengfei and Gao, Lin}, title = {IM-Animation: An Implicit Motion Representation for Identity-Decoupled Character Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4635-4646} }
Is Your Text-to-Image Model Robust to Caption Noise?: Weichen Yu,

Ziyan Yang,

Shanchuan Lin,

Qi Zhao,

Jianyi Wang,

Liangke Gui,

Matt Fredrikson,

Lu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Weichen and Yang, Ziyan and Lin, Shanchuan and Zhao, Qi and Wang, Jianyi and Gui, Liangke and Fredrikson, Matt and Jiang, Lu}, title = {Is Your Text-to-Image Model Robust to Caption Noise?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3789-3798} }
Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination: Xinzhuo Li,

Adheesh Juvekar,

Jiaxun Zhang,

Xingyou Liu,

Muntasir Wahed,

Kiet A. Nguyen,

Yifan Shen,

Tianjiao Yu,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinzhuo and Juvekar, Adheesh and Zhang, Jiaxun and Liu, Xingyou and Wahed, Muntasir and Nguyen, Kiet A. and Shen, Yifan and Yu, Tianjiao and Lourentzou, Ismini}, title = {Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7450-7460} }
SyntheticManga: Training-Free Manga Generation with Phased Diffusion: Xuelei Peng,

Chi-Keung Tang,

Yu-Wing Tai; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Xuelei and Tang, Chi-Keung and Tai, Yu-Wing}, title = {SyntheticManga: Training-Free Manga Generation with Phased Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4410-4418} }
FALCON: Fast Adaptive Lightweight Computation of Intensities and Events for Depth Estimation: Sankarshana Venugopal,

Mohammad Mostafavi,

Jonghyun Choi; [pdf] [supp]
[bibtex]
@InProceedings{Venugopal_2026_CVPR, author = {Venugopal, Sankarshana and Mostafavi, Mohammad and Choi, Jonghyun}, title = {FALCON: Fast Adaptive Lightweight Computation of Intensities and Events for Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5024-5033} }
Self-Evolving 3D Scene Generation from a Single Image: Kaizhi Zheng,

Yue Fan,

Jing Gu,

Zishuo Xu,

Xuehai He,

Xin Eric Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Kaizhi and Fan, Yue and Gu, Jing and Xu, Zishuo and He, Xuehai and Wang, Xin Eric}, title = {Self-Evolving 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {579-590} }
SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting: Jikai Wang,

Xingtai Gui,

Jiahao Gong,

Feiyang Tan,

Wencheng Han,

Cheng-Zhong Xu,

Jianbing Shen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jikai and Gui, Xingtai and Gong, Jiahao and Tan, Feiyang and Han, Wencheng and Xu, Cheng-Zhong and Shen, Jianbing}, title = {SurfelOcc: Self-supervised Occupancy Prediction via 2D Surfel Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1039-1049} }
Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness: Debarshi Brahma,

Soma Biswas; [pdf] [supp]
[bibtex]
@InProceedings{Brahma_2026_CVPR, author = {Brahma, Debarshi and Biswas, Soma}, title = {Defending CLIP via Noise-Induced Feature Dynamics for Training-Free, Zero-shot Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {656-665} }
Adversarial Concept Distillation for One-Step Diffusion Personalization: Yixiong Yang,

Tao Wu,

Senmao Li,

Shiqi Yang,

Yaxing Wang,

Joost van de Weijer,

Kai Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yixiong and Wu, Tao and Li, Senmao and Yang, Shiqi and Wang, Yaxing and van de Weijer, Joost and Wang, Kai}, title = {Adversarial Concept Distillation for One-Step Diffusion Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4321-4333} }
Bootstrapping Sign Language Annotations with Sign Language Models: Colin Lea,

Vasileios Baltatzis,

Connor Gillis,

Raja Kushalnagar,

Lorna Quandt,

Leah Findlater; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lea_2026_CVPR, author = {Lea, Colin and Baltatzis, Vasileios and Gillis, Connor and Kushalnagar, Raja and Quandt, Lorna and Findlater, Leah}, title = {Bootstrapping Sign Language Annotations with Sign Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3630-3640} }
RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection: Ozsel Kilinc,

Cem Tarhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kilinc_2026_CVPR, author = {Kilinc, Ozsel and Tarhan, Cem}, title = {RQR3D: Reparametrizing the regression targets for BEV-based 3D object detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1159-1169} }
MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing: Changho Choi,

Minho Kim,

Jinkyu Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Changho and Kim, Minho and Kim, Jinkyu}, title = {MambaEye: A Size-Agnostic Visual Encoder with Causal Sequential Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2659-2668} }
MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling: Sicong Leng,

Jing Wang,

Jiaxi Li,

Hao Zhang,

Zhiqiang Hu,

Boqiang Zhang,

Yuming Jiang,

Hang Zhang,

Xin Li,

Deli Zhao,

Wei Lu,

Yu Rong,

Aixin Sun,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Leng_2026_CVPR, author = {Leng, Sicong and Wang, Jing and Li, Jiaxi and Zhang, Hao and Hu, Zhiqiang and Zhang, Boqiang and Jiang, Yuming and Zhang, Hang and Li, Xin and Zhao, Deli and Lu, Wei and Rong, Yu and Sun, Aixin and Lu, Shijian}, title = {MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9075-9087} }
Seeing Through Fog: Towards Fog-Invariant Action Recognition: Enqi Liu,

Liyuan Pan,

Zhi Gao,

Lingzhi Li,

Qing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Enqi and Pan, Liyuan and Gao, Zhi and Li, Lingzhi and Li, Qing}, title = {Seeing Through Fog: Towards Fog-Invariant Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6966-6975} }
Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory: Cheng-Yen Yang,

Hsiang-Wei Huang,

Kuang-Ming Chen,

Kunjun Li,

Jenq-Neng Hwang; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Cheng-Yen and Huang, Hsiang-Wei and Chen, Kuang-Ming and Li, Kunjun and Hwang, Jenq-Neng}, title = {Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8358-8367} }
From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval: Yi-Xiang Zhang,

Yu-Shuen Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Xiang and Wang, Yu-Shuen}, title = {From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9520-9530} }
RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction: Yangfan Zhao,

Hanwei Zhang,

Ke Huang,

Qiufeng Wang,

Zhenzhou Shao,

Dengyu Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yangfan and Zhang, Hanwei and Huang, Ke and Wang, Qiufeng and Shao, Zhenzhou and Wu, Dengyu}, title = {RU4D-SLAM: Reweighting Uncertainty in Gaussian Splatting SLAM for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1492-1502} }
ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval: Haiming Zhao,

Tai Wang; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haiming and Wang, Tai}, title = {ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8164-8174} }
PolyReal: A Benchmark for Real-World Polymer Science Workflows: Wanhao Liu,

Weida Wang,

Jiaqing Xie,

Suorong Yang,

Jue Wang,

Benteng Chen,

Guangtao Mei,

Zonglin Yang,

Shufei Zhang,

Yuchun Mo,

Lang Cheng,

Jin Zeng,

Houqiang Li,

Wanli Ouyang,

Yuqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Wanhao and Wang, Weida and Xie, Jiaqing and Yang, Suorong and Wang, Jue and Chen, Benteng and Mei, Guangtao and Yang, Zonglin and Zhang, Shufei and Mo, Yuchun and Cheng, Lang and Zeng, Jin and Li, Houqiang and Ouyang, Wanli and Li, Yuqiang}, title = {PolyReal: A Benchmark for Real-World Polymer Science Workflows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1954-1964} }
Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning: Jiayi Wang,

Wei Dai,

Haoyu Wang,

Sihan Yang,

Haixia Bi,

Jian Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Dai, Wei and Wang, Haoyu and Yang, Sihan and Bi, Haixia and Sun, Jian}, title = {Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7520-7529} }
GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation: Chaewon Lee,

JunHyeok Heo,

Chang-Su Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Chaewon and Heo, JunHyeok and Kim, Chang-Su}, title = {GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8736-8745} }
Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization: Linsi Wu,

Gang Shen,

Xuefei Lv,

Chenglong Wu,

Yuru Pei; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Linsi and Shen, Gang and Lv, Xuefei and Wu, Chenglong and Pei, Yuru}, title = {Spectral-Aware Adaptive Convolution for Fine-Grained Cross-View Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2669-2679} }
On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks: Ziad Tariq Muhammad Ali,

Raja Muhammad Atif Azad,

Muhammad Ajmal Azad,

Iain Rice,

Umar Daraz,

Ali Shariq Imran,

James Holyhead; [pdf] [supp]
[bibtex]
@InProceedings{Ali_2026_CVPR, author = {Ali, Ziad Tariq Muhammad and Azad, Raja Muhammad Atif and Azad, Muhammad Ajmal and Rice, Iain and Daraz, Umar and Imran, Ali Shariq and Holyhead, James}, title = {On Evaluating Stateful Defence Models against Query-Based Black-Box Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {809-818} }
SafetyBPO: Bidirectional Preference Optimization for Safe Text-to-Image Generation: You Wu,

Beier Zhu,

Chi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, You and Zhu, Beier and Zhang, Chi}, title = {SafetyBPO: Bidirectional Preference Optimization for Safe Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4759-4768} }
FastMMoE: Accelerating Multimodal Large Language Models through Dynamic Expert Activation and Routing-Aware Token Pruning: Guoyang Xia,

Yifeng Ding,

Fengfa Li,

Lei Ren,

Wei Chen,

Fangxiang Feng,

Xiaojie Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Guoyang and Ding, Yifeng and Li, Fengfa and Ren, Lei and Chen, Wei and Feng, Fangxiang and Wang, Xiaojie}, title = {FastMMoE: Accelerating Multimodal Large Language Models through Dynamic Expert Activation and Routing-Aware Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5915-5924} }
Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems: YuChe Hsu,

AnJui Wang,

TsaiChing Ni,

YuanFu Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Hsu_2026_CVPR, author = {Hsu, YuChe and Wang, AnJui and Ni, TsaiChing and Yang, YuanFu}, title = {Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8705-8714} }
HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints: Shurui Liu,

Weide Chen,

Ancong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shurui and Chen, Weide and Wu, Ancong}, title = {HiDiGen: Hierarchical Diffusion for B-Rep Generation with Explicit Topological Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {538-546} }
Vision-R1: Evolving Human-Free Alignment in Large Vision-Language Models via Vision-Guided Reinforcement Learning: Yufei Zhan,

Yousong Zhu,

Hongyin Zhao,

Fan Yang,

Shurong Zheng,

Ming Tang,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yufei and Zhu, Yousong and Zhao, Hongyin and Yang, Fan and Zheng, Shurong and Tang, Ming and Wang, Jinqiao}, title = {Vision-R1: Evolving Human-Free Alignment in Large Vision-Language Models via Vision-Guided Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5807-5817} }
WildAni4D: Towards 4D Animal Mesh Reconstruction: Gyeongsu Cho,

Hezhen Hu,

Donghyeon Soon,

Changwoo Kang,

Kyungdon Joo; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Gyeongsu and Hu, Hezhen and Soon, Donghyeon and Kang, Changwoo and Joo, Kyungdon}, title = {WildAni4D: Towards 4D Animal Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {160-169} }
MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning: Zhongyu Wang,

Pengbo Liu; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongyu and Liu, Pengbo}, title = {MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9674-9683} }
PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET: Yixin Chen,

Yan Wang,

Wenrui Shao,

Zhaoheng Xie; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Wang, Yan and Shao, Wenrui and Xie, Zhaoheng}, title = {PHATE-Net: Differentiable Pseudotime Learning for Trustworthy Disease Trajectories in PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2534-2543} }
LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting: Yicheng Rui,

Xiao-Wei Duan,

Licai Deng,

Fan Yang,

Zhengming Dang,

Zhengjun Du,

Junhao Peng,

Wenhao Chu,

Umut Mahmut,

Kexin Li,

Yiyun Wu,

Fabo Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rui_2026_CVPR, author = {Rui, Yicheng and Duan, Xiao-Wei and Deng, Licai and Yang, Fan and Dang, Zhengming and Du, Zhengjun and Peng, Junhao and Chu, Wenhao and Mahmut, Umut and Li, Kexin and Wu, Yiyun and Feng, Fabo}, title = {LenghuSky-8: An 8-Year All-Sky Cloud Dataset with Star-Aware Masks and Alt-Az Calibration for Segmentation and Nowcasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1774-1785} }
SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation: Vishal Thengane,

Zhaochong An,

Tianjin Huang,

Son Lam Phung,

Abdesselam Bouzerdoum,

Lu Yin,

Na Zhao,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thengane_2026_CVPR, author = {Thengane, Vishal and An, Zhaochong and Huang, Tianjin and Phung, Son Lam and Bouzerdoum, Abdesselam and Yin, Lu and Zhao, Na and Zhu, Xiatian}, title = {SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7368-7377} }
From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness: My H. Dinh,

Aditya Sant,

Akshay Malhotra,

Keya Patani,

Shahab Hamidi-Rad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dinh_2026_CVPR, author = {Dinh, My H. and Sant, Aditya and Malhotra, Akshay and Patani, Keya and Hamidi-Rad, Shahab}, title = {From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7070-7079} }
MedSAD-CLIP: Supervised CLIP with Token-Patch Cross-Attention for Medical Anomaly Detection and Segmentation: Thuy Truong Tran,

Minh Kha Do,

Phuc Nguyen Duy,

Min Hun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Thuy Truong and Do, Minh Kha and Duy, Phuc Nguyen and Lee, Min Hun}, title = {MedSAD-CLIP: Supervised CLIP with Token-Patch Cross-Attention for Medical Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5168-5178} }
Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation: Junyu Chen,

Md Yousuf Harun,

Christopher Kanan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junyu and Harun, Md Yousuf and Kanan, Christopher}, title = {Unlocking ImageNet's Multi-Object Nature: Automated Large-Scale Multilabel Annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2284-2293} }
UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding: Shuquan Lian,

Yuhang Wu,

Jia Ma,

Yifan Ding,

Zihan Song,

Bingqi Chen,

Xiawu Zheng,

Hui Li,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Shuquan and Wu, Yuhang and Ma, Jia and Ding, Yifan and Song, Zihan and Chen, Bingqi and Zheng, Xiawu and Li, Hui and Ji, Rongrong}, title = {UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8575-8584} }
SCP: Spatial Causal Prediction in Video: Yanguang Zhao,

Jie Yang,

Shengqiong Wu,

Shutong Hu,

Hongbo Qiu,

Yu Wang,

Guijia Zhang,

Tan Kai Ze,

Hao Fei,

Chia-Wen Lin,

Mong-Li Lee,

Wynne Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yanguang and Yang, Jie and Wu, Shengqiong and Hu, Shutong and Qiu, Hongbo and Wang, Yu and Zhang, Guijia and Ze, Tan Kai and Fei, Hao and Lin, Chia-Wen and Lee, Mong-Li and Hsu, Wynne}, title = {SCP: Spatial Causal Prediction in Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7165-7175} }
RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes: Zhichao Sun,

Yepeng Liu,

Zhiling Su,

Huachao Zhu,

Yuliang Gu,

Yuda Zou,

Zelong Liu,

Gui-Song Xia,

Bo Du,

Yongchao Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhichao and Liu, Yepeng and Su, Zhiling and Zhu, Huachao and Gu, Yuliang and Zou, Yuda and Liu, Zelong and Xia, Gui-Song and Du, Bo and Xu, Yongchao}, title = {RefDrone: A Challenging Benchmark for Referring Expression Comprehension in Drone Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1752-1762} }
CineMatte: Background Matting for Virtual Production and Beyond: Yuanjian He,

Chen Zhang,

Fasheng Chen,

Jiangbo Cao; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yuanjian and Zhang, Chen and Chen, Fasheng and Cao, Jiangbo}, title = {CineMatte: Background Matting for Virtual Production and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8725-8735} }
ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction: Sirshapan Mitra,

Yogesh S Rawat; [pdf] [arXiv]
[bibtex]
@InProceedings{Mitra_2026_CVPR, author = {Mitra, Sirshapan and Rawat, Yogesh S}, title = {ProDiG: Progressive Diffusion-Guided Gaussian Splatting for Aerial to Ground Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {22-32} }
USV: Unified Sparsification for Accelerating Video Diffusion Models: Xinjian Wu,

Hongmei Wang,

Yuan Zhou,

Qinglin Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xinjian and Wang, Hongmei and Zhou, Yuan and Lu, Qinglin}, title = {USV: Unified Sparsification for Accelerating Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4180-4189} }
DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations: Cyril Kana Tepakbong,

Kévin Bouchard,

Julien Maitre; [pdf] [supp]
[bibtex]
@InProceedings{Tepakbong_2026_CVPR, author = {Tepakbong, Cyril Kana and Bouchard, K\'evin and Maitre, Julien}, title = {DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7483-7492} }
CtrlISP: Rescuing Low-Light RAW Images via Controllable Neural ISP: Chi Zhang,

Yachun Li,

Hang Du,

Shicai Yang,

Di Xie,

Jiang Zhu,

Yang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Li, Yachun and Du, Hang and Yang, Shicai and Xie, Di and Zhu, Jiang and Yang, Yang}, title = {CtrlISP: Rescuing Low-Light RAW Images via Controllable Neural ISP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4888-4897} }
Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models: Feng Chen,

Chenhui Gou,

Yefei He,

Yang Yang,

Bohan Zhuang,

Qi Wu; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Feng and Gou, Chenhui and He, Yefei and Yang, Yang and Zhuang, Bohan and Wu, Qi}, title = {Beyond Accuracy: An Empirical Study of Perception Stability in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3050-3059} }
Complexity of Linear Regions in Self-supervised Deep ReLU Networks: Mufhumudzi Muthivhi,

Terence L. van Zyl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Muthivhi_2026_CVPR, author = {Muthivhi, Mufhumudzi and van Zyl, Terence L.}, title = {Complexity of Linear Regions in Self-supervised Deep ReLU Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6911-6920} }
A Diagnostic Study of Region-Based Representations in Multimodal LLMs: Ji Li,

Shengcao Cao,

Yu-Xiong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ji and Cao, Shengcao and Wang, Yu-Xiong}, title = {A Diagnostic Study of Region-Based Representations in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5978-5988} }
Towards Noise-Robust Medical Segmentation via Chebyshev-Attention-Based Asymmetric UNet: Yue Xin,

Ziyang Zheng,

Wenrui Dai,

Chenglin Li,

Junni Zou,

Hongkai Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Xin_2026_CVPR, author = {Xin, Yue and Zheng, Ziyang and Dai, Wenrui and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {Towards Noise-Robust Medical Segmentation via Chebyshev-Attention-Based Asymmetric UNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5235-5244} }
Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks: Zhiqiu Xia,

Furong Mu,

Qi Li,

Shanshan Zhang,

Jie Gui,

Chunpeng Wang,

Yunan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zhiqiu and Mu, Furong and Li, Qi and Zhang, Shanshan and Gui, Jie and Wang, Chunpeng and Liu, Yunan}, title = {Watermarking Matters for Deepfake Detection: A Proactive Method for Detecting Forgeries under Conventional Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1650-1659} }
Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity: Peicheng Zhou,

Shancheng Fang,

Chenhui Jin,

Bowei Pu,

Hongtao Xie; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Peicheng and Fang, Shancheng and Jin, Chenhui and Pu, Bowei and Xie, Hongtao}, title = {Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6799-6808} }
A Simple Framework for Visual Navigation: Faith Johnson,

Bryan Bo Cao,

Shubham Jain,

Ashwin Ashok,

Kristin Dana; [pdf] [arXiv]
[bibtex]
@InProceedings{Johnson_2026_CVPR, author = {Johnson, Faith and Cao, Bryan Bo and Jain, Shubham and Ashok, Ashwin and Dana, Kristin}, title = {A Simple Framework for Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3167-3177} }
Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models: Yiwei You,

Zan Chen,

Bo Wang,

Xiaofei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Yiwei and Chen, Zan and Wang, Bo and Zhou, Xiaofei}, title = {Reliable Test-time Adaptation Via Evidential Uncertainty Modeling in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2324-2334} }
DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation: Dhenenjay Yadav,

Rohan Sawai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yadav_2026_CVPR, author = {Yadav, Dhenenjay and Sawai, Rohan}, title = {DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7625-7633} }
HeartcareGPT: A Unified Multimodal ECG Suite for Dual Signal-Image Modeling and Understanding: Yihan Xie,

Sijing Li,

Zhuonan Wang,

Tianwei Lin,

Chenglin Yang,

Yu Zhong,

Wenjie Yan,

Wenqiao Zhang,

Xiaogang Guo,

Jun Xiao,

Yueting Zhuang,

Beng Chin Ooi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yihan and Li, Sijing and Wang, Zhuonan and Lin, Tianwei and Yang, Chenglin and Zhong, Yu and Yan, Wenjie and Zhang, Wenqiao and Guo, Xiaogang and Xiao, Jun and Zhuang, Yueting and Ooi, Beng Chin}, title = {HeartcareGPT: A Unified Multimodal ECG Suite for Dual Signal-Image Modeling and Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6228-6238} }
Rethinking VLMs for Image Forgery Detection and Localization: Shaofeng Guo,

Jiequan Cui,

Richang Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Shaofeng and Cui, Jiequan and Hong, Richang}, title = {Rethinking VLMs for Image Forgery Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5828-5837} }
Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning: Zhilong Mao,

Hang Zhang,

Yanmin Li,

Lihua Liu,

Jibing Wu,

Mao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Zhilong and Zhang, Hang and Li, Yanmin and Liu, Lihua and Wu, Jibing and Wang, Mao}, title = {Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7810-7819} }
MReactor: Offline Multiple Appropriate Facial Reaction Generation with Hierarchical Cognitive Disentanglement: Jiachen Luo,

Jiajun He,

Shuai Shen,

Lin Wang,

Huy Phan,

Joshua Reiss,

Lin Haijun,

Bjoern Schuller,

Zeyu Fu,

Siyang Song; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Jiachen and He, Jiajun and Shen, Shuai and Wang, Lin and Phan, Huy and Reiss, Joshua and Haijun, Lin and Schuller, Bjoern and Fu, Zeyu and Song, Siyang}, title = {MReactor: Offline Multiple Appropriate Facial Reaction Generation with Hierarchical Cognitive Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3354-3363} }
UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation: Junliang Ye,

Zehuan Huang,

Yansong Qu,

Chunshi Wang,

Yunhan Yang,

Yang Li,

Yawei Luo,

Zhuo Chen,

Sheng Lu,

Jun Zhu,

Chunchao Guo; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Junliang and Huang, Zehuan and Qu, Yansong and Wang, Chunshi and Yang, Yunhan and Li, Yang and Luo, Yawei and Chen, Zhuo and Lu, Sheng and Zhu, Jun and Guo, Chunchao}, title = {UniVerse3D: Emerging Properties of Unified Multimodal Models in 3D Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {613-623} }
Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding: Yanan Zhu,

Ziwei Xiang,

Jiamin Wu,

Jinyang Guo,

Hongyuan Zhang,

Chunfeng Song,

Hongjian Fang,

Yufei Guo,

Xianglong Liu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yanan and Xiang, Ziwei and Wu, Jiamin and Guo, Jinyang and Zhang, Hongyuan and Song, Chunfeng and Fang, Hongjian and Guo, Yufei and Liu, Xianglong}, title = {Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6667-6676} }
STORM: End-to-End Referring Multi-Object Tracking in Videos: Zijia Lu,

Jingru Yi,

Jue Wang,

Yuxiao Chen,

Junwen Chen,

Xinyu Li,

Davide Modolo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zijia and Yi, Jingru and Wang, Jue and Chen, Yuxiao and Chen, Junwen and Li, Xinyu and Modolo, Davide}, title = {STORM: End-to-End Referring Multi-Object Tracking in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8347-8357} }
Towards Calibrated Gradient-based Multi-Task Learning: Linxiao Cao,

Mianzimei Yang,

Zhipeng Zhou,

Hong Xie,

Defu Lian,

Menglin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Linxiao and Yang, Mianzimei and Zhou, Zhipeng and Xie, Hong and Lian, Defu and Yang, Menglin}, title = {Towards Calibrated Gradient-based Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5127-5136} }
NAKUL-Med: Spectral-Graph State Space Models with Dynamics Kernels for Medical Signals: Badri N Patro,

Vijay S Agneeswaran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {NAKUL-Med: Spectral-Graph State Space Models with Dynamics Kernels for Medical Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5399-5408} }
Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning: Thinh Nguyen,

Le Huy Khiem,

Van-Tuan Tran,

Khoa D Doan,

Nitesh V. Chawla,

Kok-Seng Wong; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Khiem, Le Huy and Tran, Van-Tuan and Doan, Khoa D and Chawla, Nitesh V. and Wong, Kok-Seng}, title = {Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7728-7736} }
Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance: Jia Li,

Zhankai Li,

Yongqiang Yu,

Xuehu Yan,

Yuliang Lu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jia and Li, Zhankai and Yu, Yongqiang and Yan, Xuehu and Lu, Yuliang}, title = {Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7935-7944} }
UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation: Wufei Ma,

Sky Cen,

Jianzhi Shen,

Rex Lee,

León Begiristain,

Yan Zhuang,

Jiawei Peng,

Zhifei Yu,

Tianao Song,

Xinyuan Qi,

Tianmin Shu,

Adam Kortylewski,

Alan Yuille; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Wufei and Cen, Sky and Shen, Jianzhi and Lee, Rex and Begiristain, Le\'on and Zhuang, Yan and Peng, Jiawei and Yu, Zhifei and Song, Tianao and Qi, Xinyuan and Shu, Tianmin and Kortylewski, Adam and Yuille, Alan}, title = {UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9716-9725} }
C3-Diff: Super-resolving Spatial Transcriptomics via Cross-modal Cross-content Contrastive Diffusion Modelling: Xiaofei Wang,

Stephen J Price,

Chao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaofei and Price, Stephen J and Li, Chao}, title = {C3-Diff: Super-resolving Spatial Transcriptomics via Cross-modal Cross-content Contrastive Diffusion Modelling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5463-5473} }
Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes: Ashish Kumar,

Aarthi S,

Akshay Agarwal; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and S, Aarthi and Agarwal, Akshay}, title = {Tap, Scan, Exploit: The Hidden Vulnerabilities of Everyday QR Codes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {857-866} }
Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding: Huan Hu,

Ping Chen,

Zezhou Chen,

Zhaoxiang Liu,

Zipeng Wang,

Xiang Liu,

Xin Wang,

Kai Wang,

Shiguo Lian; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Huan and Chen, Ping and Chen, Zezhou and Liu, Zhaoxiang and Wang, Zipeng and Liu, Xiang and Wang, Xin and Wang, Kai and Lian, Shiguo}, title = {Beyond 3D Geometry: M3FD, a Large-Scale Dataset and Benchmark for Multimodal 3D Perceptual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1986-1995} }
CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning: Xinyu Li,

Shiliang Sun; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinyu and Sun, Shiliang}, title = {CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9065-9074} }
GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation: Changqun Feng,

Wangxiandi Yin,

Xin Hu,

Lei Zhao,

Dongyang Zhang,

Tao He; [pdf]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Changqun and Yin, Wangxiandi and Hu, Xin and Zhao, Lei and Zhang, Dongyang and He, Tao}, title = {GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7143-7153} }
FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift: Jinshan Lai,

Tingxuan Huang,

Baoyang Jiang,

Liuyu Xiang,

Qiang Ma,

Jianwei Hu; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Jinshan and Huang, Tingxuan and Jiang, Baoyang and Xiang, Liuyu and Ma, Qiang and Hu, Jianwei}, title = {FedCVC: Federated Primal-Dual Learning with Client-Driven Virtual Compensation for Mitigating Dual Drift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2514-2523} }
DSA: Dynamic Step Allocation for Fast Autoregressive Video Generation: Thanh-Tung Le,

Yunhan Zhao,

Menglei Chai,

Zhengyang Shen,

Zhe Cao,

Danhang Tang,

Xiaohui Xie,

Deying Kong; [pdf] [supp]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Thanh-Tung and Zhao, Yunhan and Chai, Menglei and Shen, Zhengyang and Cao, Zhe and Tang, Danhang and Xie, Xiaohui and Kong, Deying}, title = {DSA: Dynamic Step Allocation for Fast Autoregressive Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4334-4344} }
Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras: Hodaka Kawachi,

Tomoya Nakamura,

Hiroaki Santo,

SaiKiran Kumar Tedla,

Trevor D Canham,

Yasushi Yagi,

Michael S. Brown; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawachi_2026_CVPR, author = {Kawachi, Hodaka and Nakamura, Tomoya and Santo, Hiroaki and Tedla, SaiKiran Kumar and Canham, Trevor D and Yagi, Yasushi and Brown, Michael S.}, title = {Towards Imperceptible Watermarking Via Environment Illumination for Consumer Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1273-1282} }
If you can describe it, they can see it: Cross-Modal Learning of Visual Concepts from Textual Descriptions: Carlo Alberto Barbano,

Luca Molinaro,

Massimiliano Ciranni,

Emanuele Aiello,

Vito Paolo Pastore,

Marco Grangetto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barbano_2026_CVPR, author = {Barbano, Carlo Alberto and Molinaro, Luca and Ciranni, Massimiliano and Aiello, Emanuele and Pastore, Vito Paolo and Grangetto, Marco}, title = {If you can describe it, they can see it: Cross-Modal Learning of Visual Concepts from Textual Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6122-6132} }
IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection: Qian Xu,

Shuaipeng Fan,

Fei Gao,

Mingjin Zhang; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qian and Fan, Shuaipeng and Gao, Fei and Zhang, Mingjin}, title = {IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8408-8418} }
AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization: Zhaorong Wang,

Yoshihiro Kanamori,

Yuki Endo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhaorong and Kanamori, Yoshihiro and Endo, Yuki}, title = {AvatarMix: Identity-Preserving Cross-Avatar Composition for Outfit Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {425-435} }
DeepFakeShield: A Proactive Defense Against Malicious Face Swapping: Saeed Karimi-Bidhendi,

Joseph DeGol,

Eric Wengrowski,

Dominic Roberts,

Kristin Dana; [pdf] [supp]
[bibtex]
@InProceedings{Karimi-Bidhendi_2026_CVPR, author = {Karimi-Bidhendi, Saeed and DeGol, Joseph and Wengrowski, Eric and Roberts, Dominic and Dana, Kristin}, title = {DeepFakeShield: A Proactive Defense Against Malicious Face Swapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {867-877} }
Multimodal Reasoning with Explicit Reasoning Patterns and Rewards: Han Qiu,

Sheng Jin,

Zhongrong Zuo,

Ziyue Wang,

Qi She,

Ling Shao,

Shijian Lu; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Han and Jin, Sheng and Zuo, Zhongrong and Wang, Ziyue and She, Qi and Shao, Ling and Lu, Shijian}, title = {Multimodal Reasoning with Explicit Reasoning Patterns and Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9542-9551} }
HAIT: Hybrid Adversarial Iterative Training for Mitigating Object Hallucination in Large Vision-Language Models: Liangjie Zhao,

Liao Wenjie,

Ming Feng,

Xiaohui Song,

Huafei Li,

Haonan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Liangjie and Wenjie, Liao and Feng, Ming and Song, Xiaohui and Li, Huafei and Lu, Haonan}, title = {HAIT: Hybrid Adversarial Iterative Training for Mitigating Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6072-6079} }
Anatomy-Aware Adaptive Feature Perturbation Framework for Semi-Supervised MRI Segmentation: Ji Lin,

Bo Peng,

Suping Li,

Qianni Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Ji and Peng, Bo and Li, Suping and Zhang, Qianni}, title = {Anatomy-Aware Adaptive Feature Perturbation Framework for Semi-Supervised MRI Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5620-5631} }
Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models: Zheyuan Gu,

Qingsong Zhao,

Yusong Wang,

Zhaohong Huang,

Xinqi Li,

Chen Yuan,

Jiawei Shao,

Chi Zhang,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zheyuan and Zhao, Qingsong and Wang, Yusong and Huang, Zhaohong and Li, Xinqi and Yuan, Chen and Shao, Jiawei and Zhang, Chi and Li, Xuelong}, title = {Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8585-8595} }
FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer: Shenghe Zheng,

Minyu Zhang,

Tianhao Liu,

Hongzhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Shenghe and Zhang, Minyu and Liu, Tianhao and Wang, Hongzhi}, title = {FREE-Switch: Frequency-Based Dynamic LoRA Switch for Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2793-2802} }
VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing: Andong Deng,

Dawei Du,

Zhenfang Chen,

Wen Zhong,

Fan Chen,

Guang Chen,

Chia-Wen Kuo,

Longyin Wen,

Chen Chen,

Sijie Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Andong and Du, Dawei and Chen, Zhenfang and Zhong, Wen and Chen, Fan and Chen, Guang and Kuo, Chia-Wen and Wen, Longyin and Chen, Chen and Zhu, Sijie}, title = {VEBench: Benchmarking Large Multimodal Models for Real-world Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2187-2196} }
HarmoniDiff-RS: Training-Free Diffusion Harmonization for Satellite Image Composition: Xiaoqi Zhuang,

Jefersson A Dos Santos,

Jungong Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xiaoqi and A Dos Santos, Jefersson and Han, Jungong}, title = {HarmoniDiff-RS: Training-Free Diffusion Harmonization for Satellite Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6352-6360} }
Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting: Zhenhe Liang,

Congqi Cao,

Lanshu Hu,

Liujie Pan; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhenhe and Cao, Congqi and Hu, Lanshu and Pan, Liujie}, title = {Qinling-GFFE: A Novel Station-based Benchmark and Graph-Frequency Fusion Enhancer for Precipitation Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2554-2563} }
Vote-in-Context: VLMs as Explainable Zero-Shot Rank Fusers: Mohamed Eltahir,

Ali Habibullah,

Lama Ayash,

Tanveer Hussain,

Naeemullah Khan; [pdf] [supp]
[bibtex]
@InProceedings{Eltahir_2026_CVPR, author = {Eltahir, Mohamed and Habibullah, Ali and Ayash, Lama and Hussain, Tanveer and Khan, Naeemullah}, title = {Vote-in-Context: VLMs as Explainable Zero-Shot Rank Fusers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6496-6505} }
Harmonized Multi-Layer Text-to-Image Generation with Generative Priors: Yusuf Dalva,

Yijun Li,

Qing Liu,

Nanxuan Zhao,

Jianming Zhang,

Zhe Lin,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dalva_2026_CVPR, author = {Dalva, Yusuf and Li, Yijun and Liu, Qing and Zhao, Nanxuan and Zhang, Jianming and Lin, Zhe and Yanardag, Pinar}, title = {Harmonized Multi-Layer Text-to-Image Generation with Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8471-8480} }
ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding: Lingjun Zhao,

Yandong Luo,

James Hays,

Lu Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lingjun and Luo, Yandong and Hays, James and Gan, Lu}, title = {ShelfGaussian: Shelf-Supervised Open-Vocabulary Gaussian-Based 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1398-1409} }
PEPR: Privileged Event-based Predictive Regularization for Domain Generalization: Gabriele Magrini,

Federico Becattini,

Niccolò Biondi,

Pietro Pala; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magrini_2026_CVPR, author = {Magrini, Gabriele and Becattini, Federico and Biondi, Niccol\`o and Pala, Pietro}, title = {PEPR: Privileged Event-based Predictive Regularization for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3209-3219} }
PCSTracker: Long-term Scene Flow Estimation for Point Cloud Sequences: Min Lin,

Gangwei Xu,

Xianqi Wang,

Yuyi Peng,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Min and Xu, Gangwei and Wang, Xianqi and Peng, Yuyi and Yang, Xin}, title = {PCSTracker: Long-term Scene Flow Estimation for Point Cloud Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4920-4930} }
Vision Inference Former: Sustaining Visual Consistency in Multimodal Large Language Models: Xinpeng Dong,

Min Zhang,

Kairong Han,

Xu Tan,

Fei Wu,

Kun Kuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Xinpeng and Zhang, Min and Han, Kairong and Tan, Xu and Wu, Fei and Kuang, Kun}, title = {Vision Inference Former: Sustaining Visual Consistency in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6100-6110} }
DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting: Minwei Wen,

Yang Wei,

Junhao Xiao,

Xiuli Bi,

Bin Xiao; [pdf]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Minwei and Wei, Yang and Xiao, Junhao and Bi, Xiuli and Xiao, Bin}, title = {DRA: Structure-Preserving Backdoor Erasure via Diagnosing, Recalibrating, and Adapting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {779-788} }
CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion: Bingyi Liu,

Chuanhui Zhu,

Hongfei Xue,

Jian Teng,

Jipeng Liu,

Enshu Wang,

Penglin Dai,

Pu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Bingyi and Zhu, Chuanhui and Xue, Hongfei and Teng, Jian and Liu, Jipeng and Wang, Enshu and Dai, Penglin and Wang, Pu}, title = {CLLAP: Contrastive Learning-based LiDAR-Augmented Pretraining for Enhanced Radar-Camera Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {99-108} }
100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing: Cunqi Wu,

Peng Zhou,

Jie Qin,

Qi Tian; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Cunqi and Zhou, Peng and Qin, Jie and Tian, Qi}, title = {100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8449-8460} }
Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models: Canyu Chen,

Yuguang Yang,

Zhewen Tan,

Yizhi Wang,

Ruiyi Zhan,

Haiyan Liu,

Xuanyao Mao,

Jason Bao,

Xinyue Tang,

Linlin Yang,

Bingchuan Sun,

Yan Wang,

Baochang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Canyu and Yang, Yuguang and Tan, Zhewen and Wang, Yizhi and Zhan, Ruiyi and Liu, Haiyan and Mao, Xuanyao and Bao, Jason and Tang, Xinyue and Yang, Linlin and Sun, Bingchuan and Wang, Yan and Zhang, Baochang}, title = {Devil is in Narrow Policy: Unleashing Exploration in Driving VLA Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1062-1072} }
APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition: Geunyoung Jung,

Soohong Kim,

Inseok Kong,

Jiyoung Jung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Geunyoung and Kim, Soohong and Kong, Inseok and Jung, Jiyoung}, title = {APC: Transferable and Efficient Adversarial Point Counterattack for Robust 3D Point Cloud Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {789-798} }
R2MoE: Representation and Expert Selection Dual-Regularized Mixture-of-Experts for Multimodal Clinical Data: Wajih Hassan Raza,

Mya Schiess,

Juan Martinez Lemus,

Timothy Michael Ellmore,

Charles Green,

Claudio Soto,

Xin Fu,

Renjie Hu; [pdf] [supp]
[bibtex]
@InProceedings{Raza_2026_CVPR, author = {Raza, Wajih Hassan and Schiess, Mya and Lemus, Juan Martinez and Ellmore, Timothy Michael and Green, Charles and Soto, Claudio and Fu, Xin and Hu, Renjie}, title = {R2MoE: Representation and Expert Selection Dual-Regularized Mixture-of-Experts for Multimodal Clinical Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5776-5785} }
Adapting with an Open Mind: Leveraging Open-Vocabulary Detectors for Closed Set Source-Free Domain Adaptive Object Detection: Kaustubh R Borgavi,

Sarvesh Shashikumar,

Chetan Arora; [pdf] [supp]
[bibtex]
@InProceedings{Borgavi_2026_CVPR, author = {Borgavi, Kaustubh R and Shashikumar, Sarvesh and Arora, Chetan}, title = {Adapting with an Open Mind: Leveraging Open-Vocabulary Detectors for Closed Set Source-Free Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6570-6581} }
TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning: Varun Belagali,

Saarthak Kapse,

Pierre Marza,

Srijan Das,

Zilinghan Li,

Sofiène Boutaj,

Pushpak Pati,

Srikar Yellapragada,

Tarak Nath Nandi,

Ravi K Madduri,

Joel Saltz,

Prateek Prasanna,

Stergios Christodoulidis,

Maria Vakalopoulou,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Belagali_2026_CVPR, author = {Belagali, Varun and Kapse, Saarthak and Marza, Pierre and Das, Srijan and Li, Zilinghan and Boutaj, Sofi\`ene and Pati, Pushpak and Yellapragada, Srikar and Nandi, Tarak Nath and Madduri, Ravi K and Saltz, Joel and Prasanna, Prateek and Christodoulidis, Stergios and Vakalopoulou, Maria and Samaras, Dimitris}, title = {TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5440-5451} }
CLASH: A Benchmark for Cross-Modal Contradiction Detection: Teodora Popordanoska,

Jiameng Li,

Matthew B. Blaschko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Popordanoska_2026_CVPR, author = {Popordanoska, Teodora and Li, Jiameng and Blaschko, Matthew B.}, title = {CLASH: A Benchmark for Cross-Modal Contradiction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6051-6061} }
Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information: Yinbo Liu,

Qi Wu,

Keyang Ye,

Xiao He,

Tian Tian; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yinbo and Wu, Qi and Ye, Keyang and He, Xiao and Tian, Tian}, title = {Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8492-8503} }
GeoHOI: Geometry-Enhanced Human-Object Interaction Video Generation via Hierarchical Multi-Modal Injection: Ziyi Xu,

Zejing Rao,

Juan Cao,

Xiaoqiang Liu,

Zhixue Fang,

Haoxian Zhang,

Songlin Tang,

Fan Tang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Ziyi and Rao, Zejing and Cao, Juan and Liu, Xiaoqiang and Fang, Zhixue and Zhang, Haoxian and Tang, Songlin and Tang, Fan}, title = {GeoHOI: Geometry-Enhanced Human-Object Interaction Video Generation via Hierarchical Multi-Modal Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3739-3748} }
UniD-Shift: Towards Unified Semantic Segmentation via Interpretable Shared-Private Multimodal Decomposition: Shuai Zhang,

Zhecheng Shi,

Zhuoxiao Li,

Jing Ou,

Tengxi Wang,

Yuan Liu,

Wufan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuai and Shi, Zhecheng and Li, Zhuoxiao and Ou, Jing and Wang, Tengxi and Liu, Yuan and Zhao, Wufan}, title = {UniD-Shift: Towards Unified Semantic Segmentation via Interpretable Shared-Private Multimodal Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6382-6393} }
Face Time Traveller : Travel Through Ages Without Losing Identity: Purbayan Kar,

Ayush Ghadiya,

Vishal Chudasama,

Pankaj Wasnik,

C.V. Jawahar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kar_2026_CVPR, author = {Kar, Purbayan and Ghadiya, Ayush and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {Face Time Traveller : Travel Through Ages Without Losing Identity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8756-8765} }
Beyond Optimal Transport: Model-Aligned Coupling for Flow Matching: Yexiong Lin,

Yu Yao,

Yang Zhou,

Tongliang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yexiong and Yao, Yu and Zhou, Yang and Liu, Tongliang}, title = {Beyond Optimal Transport: Model-Aligned Coupling for Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3955-3964} }
Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation: Cong Li,

Gong Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Cong and Cheng, Gong}, title = {Positive Divide and Negative Discrepancy: A New Perspective on Multi-Label Logit Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3040-3049} }
CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering: Zai Yang Yu,

Changshuo Wang,

Yuan Shi,

Linjun Sun,

Shu Wei,

Tingran Wang,

Wangyu Wu,

Yanjie Li,

Weijun Li; [pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Zai Yang and Wang, Changshuo and Shi, Yuan and Sun, Linjun and Wei, Shu and Wang, Tingran and Wu, Wangyu and Li, Yanjie and Li, Weijun}, title = {CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7220-7230} }
Prompt-driven Small Object Instance Segmentation in Earth Observation: Chenhao Wang,

Yingrui Ji,

Yu Meng,

Yunjian Zhang,

Yao Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenhao and Ji, Yingrui and Meng, Yu and Zhang, Yunjian and Zhu, Yao}, title = {Prompt-driven Small Object Instance Segmentation in Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7347-7356} }
Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks: Yachan Guo,

Jose Lu Gómez,

Danna Xue,

Yi Xiao,

Antonio M. López; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yachan and Lu G\'omez, Jose and Xue, Danna and Xiao, Yi and L\'opez, Antonio M.}, title = {Metric-Guided Feature Fusion of Visual Foundation Models for Segmentation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3231-3240} }
Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection: Zhiqiang Yang,

Renshuai Tao,

Chunjie Zhang,

Guodong Yang,

Xiaolong Zheng,

Yao Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiqiang and Tao, Renshuai and Zhang, Chunjie and Yang, Guodong and Zheng, Xiaolong and Zhao, Yao}, title = {Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8090-8100} }
AR4D: Autoregressive 4D Generation from Monocular Videos: Hanxin Zhu,

Tianyu He,

Zhibo Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hanxin and He, Tianyu and Chen, Zhibo}, title = {AR4D: Autoregressive 4D Generation from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {88-98} }
HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects: Radim Spetlik,

Michal Pliska,

Vojtěch Vrba,

Jiří Matas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Spetlik_2026_CVPR, author = {Spetlik, Radim and Pliska, Michal and Vrba, Vojt\v{e}ch and Matas, Ji\v{r}{\'\i}}, title = {HelixTrack: Event-Based Tracking and RPM Estimation of Propeller-like Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3200-3208} }
OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving: Zhenguo Zhang,

Haohan Zheng,

Yishen Wang,

Le Xu,

Tianchen Deng,

Xuefeng Chen,

Qu Chen,

Bo Zhang,

Wuxiong Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenguo and Zheng, Haohan and Wang, Yishen and Xu, Le and Deng, Tianchen and Chen, Xuefeng and Chen, Qu and Zhang, Bo and Huang, Wuxiong}, title = {OmniDrive-R1: Reinforcement-driven Interleaved Multi-modal Chain-of-Thought for Trustworthy Vision-Language Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1106-1116} }
OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM: Leqian Ding,

Caibo Li,

Yu Guo,

Fei Wang; [pdf]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Leqian and Li, Caibo and Guo, Yu and Wang, Fei}, title = {OminiMAG-SLAM : Unified Online Dual Graph Optimization for Multi-Agent Gaussian SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1431-1440} }
Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning: Yudi Shi,

Shangzhe Di,

Qirui Chen,

Qinian Wang,

Jiayin Cai,

Xiaolong Jiang,

Yao Hu,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Wang, Qinian and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi}, title = {Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9248-9258} }
Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction: Ziyao Guo,

Kaipeng Zhang,

Michael Qizhe Shieh; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyao and Zhang, Kaipeng and Shieh, Michael Qizhe}, title = {Improving Autoregressive Image Generation Through Coarse-to-Fine Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1230-1239} }
PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception: Armin Maleki,

Hayder Radha; [pdf] [supp]
[bibtex]
@InProceedings{Maleki_2026_CVPR, author = {Maleki, Armin and Radha, Hayder}, title = {PEARL: A Lightweight Prompt-based Feature Interpreter Framework for Real-Time, Anonymous, and Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1138-1147} }
Towards Efficient Multimodal Unified Reasoning Model via Model Merging: Qixiang Yin,

Huanjin Yao,

Jianghao Chen,

Jiaxing Huang,

Zhicheng Zhao,

Fei Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Qixiang and Yao, Huanjin and Chen, Jianghao and Huang, Jiaxing and Zhao, Zhicheng and Su, Fei}, title = {Towards Efficient Multimodal Unified Reasoning Model via Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9378-9388} }
UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models: Yimu Wang,

Weiming Zhuang,

Chen Chen,

Jiabo Huang,

Jingtao Li,

Lingjuan Lyu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yimu and Zhuang, Weiming and Chen, Chen and Huang, Jiabo and Li, Jingtao and Lyu, Lingjuan}, title = {UNIFORM: Unifying Knowledge from Large-scale and Diverse Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2904-2914} }
Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light: Ali Hassani,

Fengzhe Zhou,

Aditya Kane,

Jiannan Huang,

Chieh-Yun Chen,

Min Shi,

Steven Walton,

Markus Hoehnerbach,

Vijay Thakkar,

Mikhail Isaev,

Qinsheng Zhang,

Bing Xu,

Haicheng Wu,

Wen-mei Hwu,

Ming-Yu Liu,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hassani_2026_CVPR, author = {Hassani, Ali and Zhou, Fengzhe and Kane, Aditya and Huang, Jiannan and Chen, Chieh-Yun and Shi, Min and Walton, Steven and Hoehnerbach, Markus and Thakkar, Vijay and Isaev, Mikhail and Zhang, Qinsheng and Xu, Bing and Wu, Haicheng and Hwu, Wen-mei and Liu, Ming-Yu and Shi, Humphrey}, title = {Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3009-3018} }
SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers: Takuro Kawada,

Shunsuke Kitada,

Sota Nemoto,

Hitoshi Iyatomi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawada_2026_CVPR, author = {Kawada, Takuro and Kitada, Shunsuke and Nemoto, Sota and Iyatomi, Hitoshi}, title = {SciGA: A Comprehensive Dataset for Designing Graphical Abstracts in Academic Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2250-2260} }
All-Age Human Mesh Recovery: Laura Bravo-Sánchez,

Matthieu Armando,

Romain Brégier,

Grégory Rogez,

Serena Yeung-Levy,

Fabien Baradel; [pdf] [supp]
[bibtex]
@InProceedings{Bravo-Sanchez_2026_CVPR, author = {Bravo-S\'anchez, Laura and Armando, Matthieu and Br\'egier, Romain and Rogez, Gr\'egory and Yeung-Levy, Serena and Baradel, Fabien}, title = {All-Age Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3677-3687} }
InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models: Zhiqiang Sheng,

Xumeng Han,

Zhiwei Zhang,

Zenghui Xiong,

Yifan Ding,

Aoxiang Ping,

Xiang Li,

Tong Guo,

Yao Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Zhiqiang and Han, Xumeng and Zhang, Zhiwei and Xiong, Zenghui and Ding, Yifan and Ping, Aoxiang and Li, Xiang and Guo, Tong and Mao, Yao}, title = {InEdit-Bench: Benchmarking Intermediate Logical Pathways for Intelligent Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2176-2186} }
LlamaRG: A Multi-View Large Language Model for Radiology Report Generation: Tanuja Jayas,

Aditya Rastogi,

Pavithra Raghavan,

Gianluca Brugnara,

Kai Schlamp,

Martha Foltyn-Dumitru,

Philipp Vollmuth; [pdf] [supp]
[bibtex]
@InProceedings{Jayas_2026_CVPR, author = {Jayas, Tanuja and Rastogi, Aditya and Raghavan, Pavithra and Brugnara, Gianluca and Schlamp, Kai and Foltyn-Dumitru, Martha and Vollmuth, Philipp}, title = {LlamaRG: A Multi-View Large Language Model for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5745-5754} }
LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation: Yang Zhou,

Shiyu Zhao,

Yuxiao Chen,

Zhenting Wang,

Can Jin,

Mingyu Zhao,

Dimitris N. Metaxas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yang and Zhao, Shiyu and Chen, Yuxiao and Wang, Zhenting and Jin, Can and Zhao, Mingyu and Metaxas, Dimitris N.}, title = {LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9643-9653} }
Hierarchical Textual Knowledge for Enhanced Image Clustering: Yijie Zhong,

Yunfan Gao,

Weipeng Jiang,

Haofen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yijie and Gao, Yunfan and Jiang, Weipeng and Wang, Haofen}, title = {Hierarchical Textual Knowledge for Enhanced Image Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9749-9758} }
Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack: Nanxiang Jiang,

Zhaoxin Fan,

Enhan Kang,

Daiheng Gao,

Yun Zhou,

Yanxia Chang,

Zheng Zhu,

Yeying Jin,

Wenjun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nanxiang and Fan, Zhaoxin and Kang, Enhan and Gao, Daiheng and Zhou, Yun and Chang, Yanxia and Zhu, Zheng and Jin, Yeying and Wu, Wenjun}, title = {Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8080-8089} }
RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video: Chenyu Wu,

Wanhua Li,

Chen Zhu-Tian,

Hanspeter Pfister; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyu and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {RiGS: Rigid-aware 4D Gaussian Splatting from a Single Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {547-557} }
Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation: Zailong Chen,

Peng Gao,

Johan Barthelemy,

Luping Zhou,

Lei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zailong and Gao, Peng and Barthelemy, Johan and Zhou, Luping and Wang, Lei}, title = {Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9327-9336} }
Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection: Wenxuan Bao,

Yanjun Zhao,

Xiyuan Yang,

Jingrui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Wenxuan and Zhao, Yanjun and Yang, Xiyuan and He, Jingrui}, title = {Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9632-9642} }
JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search: Dongyun Zou,

Zhuoyang Zhang,

Junyu Chen,

Wenkun He,

Qinhe Peng,

Hanrong Ye,

Yao Lu,

Hongxu Yin,

Yu Wang,

Song Han,

Han Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Dongyun and Zhang, Zhuoyang and Chen, Junyu and He, Wenkun and Peng, Qinhe and Ye, Hanrong and Lu, Yao and Yin, Hongxu and Wang, Yu and Han, Song and Cai, Han}, title = {JetViT: Efficient High-Resolution Vision Transformer with Post-Training Attention Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2957-2967} }
Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy: Qihong Zhao,

Shaokang Yan,

Zhimin Qiao,

Jinjia Wang,

Bo Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qihong and Yan, Shaokang and Qiao, Zhimin and Wang, Jinjia and Xiong, Bo}, title = {Three-Step Conditional Diffusion 3D Reconstruction for Light-Field Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {478-487} }
ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning: Mengyang Li,

Ou Wu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Wu, Ou}, title = {ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7914-7923} }
Detecting Precise Hand Touch Moments in Egocentric Video: Huy Anh Nguyen,

Feras Dayoub,

Minh Hoai; [pdf] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Huy Anh and Dayoub, Feras and Hoai, Minh}, title = {Detecting Precise Hand Touch Moments in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3565-3574} }
Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation: Chenhao Shi,

Yichen Zhu,

Junjie Wen,

Yefei Chen,

Ziang Liu,

Faming Fang; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Chenhao and Zhu, Yichen and Wen, Junjie and Chen, Yefei and Liu, Ziang and Fang, Faming}, title = {Teleoperation, Simulation, or Human Video? Data Utilization Law for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1388-1397} }
FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning: Alina Devkota,

Jacob Thrasher,

Donald Adjeroh,

Binod Bhattarai,

Prashnna k. Gyawali; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Devkota_2026_CVPR, author = {Devkota, Alina and Thrasher, Jacob and Adjeroh, Donald and Bhattarai, Binod and Gyawali, Prashnna k.}, title = {FedVG: Gradient-Guided Aggregation for Enhanced Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2803-2812} }
Open World Image Aesthetic Assessment: Mingxiang Liao,

Tianren Ma,

Xijin Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Mingxiang and Ma, Tianren and Zhang, Xijin}, title = {Open World Image Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9791-9801} }
CETCam: Camera-Controllable Video Generation via Consistent and Extensible Tokenization: Zelin Zhao,

Xinyu Gong,

Bangya Liu,

Ziyang Song,

Jun Zhang,

Suhui Wu,

Yongxin Chen,

Hao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zelin and Gong, Xinyu and Liu, Bangya and Song, Ziyang and Zhang, Jun and Wu, Suhui and Chen, Yongxin and Zhang, Hao}, title = {CETCam: Camera-Controllable Video Generation via Consistent and Extensible Tokenization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4749-4758} }
Rethinking Training Dynamics in Scale-Wise Autoregressive Generation: Gengze Zhou,

Chongjian Ge,

Hao Tan,

Feng Liu,

Yicong Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Gengze and Ge, Chongjian and Tan, Hao and Liu, Feng and Hong, Yicong}, title = {Rethinking Training Dynamics in Scale-Wise Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4593-4602} }
Scene-Level Heterogeneous Physics Simulation with 3D Gaussian Splats: Xiaoyang Liu,

Shangzhe Wu,

Kai Han; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaoyang and Wu, Shangzhe and Han, Kai}, title = {Scene-Level Heterogeneous Physics Simulation with 3D Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6456-6465} }
Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners: Yizheng Gong,

Xiaoyang Wang,

Siyue Yu,

Waleed Al-Nuaimy,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Wang, Xiaoyang and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7665-7674} }
Asymmetric Collaborative Distillation for Asymmetric Image Retrieval: Yi Xie,

Huaidong Zhang,

Xuandi Luo,

Yan Zhou,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yi and Zhang, Huaidong and Luo, Xuandi and Zhou, Yan and He, Shengfeng}, title = {Asymmetric Collaborative Distillation for Asymmetric Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6706-6716} }
iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning: Pan Mu,

Yuchao Zhu,

Shiqi Zhang,

Hanting Yan,

Jinglin Zhang,

Cong Bai; [pdf] [supp]
[bibtex]
@InProceedings{Mu_2026_CVPR, author = {Mu, Pan and Zhu, Yuchao and Zhang, Shiqi and Yan, Hanting and Zhang, Jinglin and Bai, Cong}, title = {iTCTSL: Interpretable Tropical Cyclone Track and Intensity Forecasting via Task Sensitive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1587-1596} }
Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment: Yibo Liu,

Ziwei Zhang,

Haozhou Pang,

Menghao Li,

Lanshan He,

Gan Qi; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yibo and Zhang, Ziwei and Pang, Haozhou and Li, Menghao and He, Lanshan and Qi, Gan}, title = {Native3D: End-to-End 3D Scene Generation via Unified Mesh-Texture Modeling and Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {381-390} }
OTPrune: Distribution-Aligned Visual Token Pruning Via Optimal Transport: Xiwen Chen,

Wenhui Zhu,

Gen Li,

Xuanzhao Dong,

Yujian Xiong,

Hao Wang,

Peijie Qiu,

Qingquan Song,

Zhipeng Wang,

Shao Tang,

Yalin Wang,

Abolfazl Razi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiwen and Zhu, Wenhui and Li, Gen and Dong, Xuanzhao and Xiong, Yujian and Wang, Hao and Qiu, Peijie and Song, Qingquan and Wang, Zhipeng and Tang, Shao and Wang, Yalin and Razi, Abolfazl}, title = {OTPrune: Distribution-Aligned Visual Token Pruning Via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5849-5859} }
Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition: Zhuohao Chen,

Zeng Li,

Yifei Zhang,

Chang Liu,

Yu Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuohao and Li, Zeng and Zhang, Yifei and Liu, Chang and Zhou, Yu}, title = {Masked Next-Scale Prediction For Self-Supervised Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1577-1586} }
Consistent Video Editing as Flow-Driven Image-to-Video Generation: Ge Wang,

Songlin Fan,

Hangxu Liu,

Quanjian Song,

Hewei Wang,

Jinfeng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ge and Fan, Songlin and Liu, Hangxu and Song, Quanjian and Wang, Hewei and Xu, Jinfeng}, title = {Consistent Video Editing as Flow-Driven Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4625-4634} }
Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models: Yue Zhang,

Rui Wang,

Jiehong Lin,

Zhongrui Wang,

Xiaojuan Qi; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yue and Wang, Rui and Lin, Jiehong and Wang, Zhongrui and Qi, Xiaojuan}, title = {Retrieval-VLA: Training-Free In-Context Adaptation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1358-1367} }
SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes: Jungho Lee,

Minhyeok Lee,

Sunghun Yang,

Minseok Kang,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jungho and Lee, Minhyeok and Yang, Sunghun and Kang, Minseok and Lee, Sangyoun}, title = {SwiftVGGT: A Scalable Visual Geometry Grounded Transformer for Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {447-456} }
DMin: Scalable Training Data Influence Estimation for Diffusion Models: Huawei Lin,

Yingjie Lao,

Weijie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Huawei and Lao, Yingjie and Zhao, Weijie}, title = {DMin: Scalable Training Data Influence Estimation for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3293-3302} }
Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation: Weijian Ma,

Shizhao Sun,

Tianyu Yu,

Ruiyu Wang,

Tat-Seng Chua,

Jiang Bian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Weijian and Sun, Shizhao and Yu, Tianyu and Wang, Ruiyu and Chua, Tat-Seng and Bian, Jiang}, title = {Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8919-8929} }
Adapting Large VLMs with Iterative and Manual Instructions for Generative Low-light Enhancement: Xiaoran Sun,

Liyan Wang,

Yeying Jin,

Kin-man Lam,

Zhixun Su,

Yang Yang,

Jinshan Pan,

Cong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaoran and Wang, Liyan and Jin, Yeying and Lam, Kin-man and Su, Zhixun and Yang, Yang and Pan, Jinshan and Wang, Cong}, title = {Adapting Large VLMs with Iterative and Manual Instructions for Generative Low-light Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4832-4842} }
Evaluating Low-Light Image Enhancement Across Multiple Intensity Levels: Maria Pilligua,

David Serrano-Lozano,

Pai Peng,

Ramon Baldrich,

Michael S. Brown,

Javier Vazquez-Corral; [pdf] [supp]
[bibtex]
@InProceedings{Pilligua_2026_CVPR, author = {Pilligua, Maria and Serrano-Lozano, David and Peng, Pai and Baldrich, Ramon and Brown, Michael S. and Vazquez-Corral, Javier}, title = {Evaluating Low-Light Image Enhancement Across Multiple Intensity Levels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5014-5023} }
Seen-to-Scene: Keep the Seen, Generate the Unseen for Video Outpainting: Inseok Jeon,

Minhyeok Lee,

Seunghoon Lee,

Minseok Kang,

Suhwan Cho,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2026_CVPR, author = {Jeon, Inseok and Lee, Minhyeok and Lee, Seunghoon and Kang, Minseok and Cho, Suhwan and Lee, Sangyoun}, title = {Seen-to-Scene: Keep the Seen, Generate the Unseen for Video Outpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4266-4275} }
ForenDeX: Unlocking Forensic Insights for Explainable AI-Generated Image Detection: Chuangchuang Tan,

Jinglu Wang,

Xiang Ming,

Renshuai Tao,

Yunchao Wei,

Yao Zhao,

Yan Lu; [pdf]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Chuangchuang and Wang, Jinglu and Ming, Xiang and Tao, Renshuai and Wei, Yunchao and Zhao, Yao and Lu, Yan}, title = {ForenDeX: Unlocking Forensic Insights for Explainable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6592-6601} }
Gaze into the Details: Locality-Sensitive Enhancement for OCTA Retinal Vessel Segmentation: Tuopusen Huang,

Ding Ma,

Xiangqian Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Tuopusen and Ma, Ding and Wu, Xiangqian}, title = {Gaze into the Details: Locality-Sensitive Enhancement for OCTA Retinal Vessel Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5409-5418} }
Affine Bases for Affine Spaces: Gabriel Dogadov,

Marc Alexa; [pdf] [supp]
[bibtex]
@InProceedings{Dogadov_2026_CVPR, author = {Dogadov, Gabriel and Alexa, Marc}, title = {Affine Bases for Affine Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {213-222} }
DELRER: Disease Evolution-Informed Longitudinal Radiology Report Generation: Kaiyu Wang,

Bing Wang,

Changchun Li,

You Lu,

Yaning Wang,

Huimao Zhang,

Ximing Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Kaiyu and Wang, Bing and Li, Changchun and Lu, You and Wang, Yaning and Zhang, Huimao and Li, Ximing}, title = {DELRER: Disease Evolution-Informed Longitudinal Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5357-5367} }
A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering: Pritham K Jena,

Bhavika Baburaj,

Tushar Anand,

Vedant Dutta,

Vineeth Ulavala,

Sk Aziz Ali; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jena_2026_CVPR, author = {Jena, Pritham K and Baburaj, Bhavika and Anand, Tushar and Dutta, Vedant and Ulavala, Vineeth and Ali, Sk Aziz}, title = {A2Z-10M+: Geometric Deep Learning with A-to-Z BRep Annotations for AI-Assisted CAD Modeling and Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1913-1923} }
CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving: Yanlin Jiang,

Yuchen Liu,

Mingren Liu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yanlin and Liu, Yuchen and Liu, Mingren}, title = {CoRT-Predictor: Chain of Risk Thought Autoregressive Trajectory Predictor for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1117-1127} }
THOM: Generating Physically Plausible Hand-Object Meshes From Text: Uyoung Jeong,

Yihalem Yimolal Tiruneh,

Hyung Jin Chang,

Seungryul Baek,

Kwang In Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Uyoung and Tiruneh, Yihalem Yimolal and Chang, Hyung Jin and Baek, Seungryul and Kim, Kwang In}, title = {THOM: Generating Physically Plausible Hand-Object Meshes From Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3653-3664} }
Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models: Abin Shoby,

Ta Duc Huy,

Tuan Dung Nguyen,

Minh Khoi Ho,

Qi Chen,

Anton van den Hengel,

Phi Le Nguyen,

Johan W. Verjans,

Vu Minh Hieu Phan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shoby_2026_CVPR, author = {Shoby, Abin and Huy, Ta Duc and Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and van den Hengel, Anton and Le Nguyen, Phi and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9185-9194} }
A Denoising-Enhanced Multimodal Learning Framework for Robust Nasal Endoscopy Report Generation: Xinpan Yuan,

Mingzhu Huang,

Liujie Hua,

Jianuo Ju,

Xiaowei Zhao,

Lin Yuanbo Wu; [pdf]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Xinpan and Huang, Mingzhu and Hua, Liujie and Ju, Jianuo and Zhao, Xiaowei and Wu, Lin Yuanbo}, title = {A Denoising-Enhanced Multimodal Learning Framework for Robust Nasal Endoscopy Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5545-5555} }
Elicit and Enhance: Advancing Multimodal Reasoning in Medical Scenarios: Zhongzhen Huang,

Linjie Mu,

Yannian Gu,

Kangzhe Hu,

Shengyi Hua,

Xiaofan Zhang; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhongzhen and Mu, Linjie and Gu, Yannian and Hu, Kangzhe and Hua, Shengyi and Zhang, Xiaofan}, title = {Elicit and Enhance: Advancing Multimodal Reasoning in Medical Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5609-5619} }
The DeepSpeak Dataset: Sarah Barrington,

Maty Bohacek,

Hany Farid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barrington_2026_CVPR, author = {Barrington, Sarah and Bohacek, Maty and Farid, Hany}, title = {The DeepSpeak Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1893-1902} }
PRADA: Probability-Ratio-Based Attribution and Detection of Autoregressive-Generated Images: Simon Damm,

Jonas Ricker,

Henning Petzka,

Asja Fischer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Damm_2026_CVPR, author = {Damm, Simon and Ricker, Jonas and Petzka, Henning and Fischer, Asja}, title = {PRADA: Probability-Ratio-Based Attribution and Detection of Autoregressive-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6506-6516} }
ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers: Chih-Chung Hsu,

Xin-Di Ma,

Wo-Ting Liao,

Chia-Ming Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsu_2026_CVPR, author = {Hsu, Chih-Chung and Ma, Xin-Di and Liao, Wo-Ting and Lee, Chia-Ming}, title = {ELSA: Exact Linear-Scan Attention for Fast and Memory-Light Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2988-2997} }
Personalized Functional Brain Network Modeling with Adaptive Auto-Weighted Learning for Automatic Brain Disorder Diagnosis: Yan Zhang,

Kun Liu,

Min Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yan and Liu, Kun and Li, Min}, title = {Personalized Functional Brain Network Modeling with Adaptive Auto-Weighted Learning for Automatic Brain Disorder Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5505-5514} }
RealDiffusion: Physics-informed Attention for Multi-character Storybook Generation: Qi Zhao,

Jun Chen,

Ivor Tsang,

Guang Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qi and Chen, Jun and Tsang, Ivor and Dai, Guang}, title = {RealDiffusion: Physics-informed Attention for Multi-character Storybook Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4698-4707} }
BLEG: LLM Functions as Powerful fMRI Graph-Enhancer for Brain Network Analysis: Rui Dong,

Zitong Wang,

Jiaxing Li,

Weihuang Zheng,

Youyong Kong; [pdf] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Rui and Wang, Zitong and Li, Jiaxing and Zheng, Weihuang and Kong, Youyong}, title = {BLEG: LLM Functions as Powerful fMRI Graph-Enhancer for Brain Network Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5662-5672} }
Self-Guided Integrated Gradient Method for Attribution: Sabrina Henry,

Alice Ruget,

Stirling Scholes,

Jonathan Leach; [pdf] [supp]
[bibtex]
@InProceedings{Henry_2026_CVPR, author = {Henry, Sabrina and Ruget, Alice and Scholes, Stirling and Leach, Jonathan}, title = {Self-Guided Integrated Gradient Method for Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3312-3321} }
VHOI: Controllable Video Generation of Human-Object Interactions from Sparse Trajectories via Motion Densification: Wanyue Zhang,

Lin Geng Foo,

Thabo Beeler,

Rishabh Dabral,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyue and Foo, Lin Geng and Beeler, Thabo and Dabral, Rishabh and Theobalt, Christian}, title = {VHOI: Controllable Video Generation of Human-Object Interactions from Sparse Trajectories via Motion Densification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4009-4021} }
When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers: Aditya Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Aditya}, title = {When Interpretability Becomes a Liability: Adversarial Attacks on CBM Concept Layers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {829-836} }
Large Multimodal Models as General In-Context Classifiers: Marco Garosi,

Matteo Farina,

Alessandro Conti,

Massimiliano Mancini,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garosi_2026_CVPR, author = {Garosi, Marco and Farina, Matteo and Conti, Alessandro and Mancini, Massimiliano and Ricci, Elisa}, title = {Large Multimodal Models as General In-Context Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6727-6736} }
UnfoldIR: Rethinking Deep Unfolding Network in Illumination Degradation Image Restoration: Chunming He,

Rihan Zhang,

Fengyang Xiao,

Chengyu Fang,

Longxiang Tang,

Rui Zhang,

Sina Farsiu; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Chunming and Zhang, Rihan and Xiao, Fengyang and Fang, Chengyu and Tang, Longxiang and Zhang, Rui and Farsiu, Sina}, title = {UnfoldIR: Rethinking Deep Unfolding Network in Illumination Degradation Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5003-5013} }
Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors: Vishal Purohit,

Wei Chen,

Qiang Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Purohit_2026_CVPR, author = {Purohit, Vishal and Chen, Wei and Qiu, Qiang}, title = {Blockwise Divide-and-Aggregate for Image Restoration using Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1263-1272} }
UniTalking: A Unified Audio-Video Framework for Talking Portrait Generation: Hebeizi Li,

Benyuan Sun,

Yi Yang,

Zihao Liang,

Zihao Yin,

Xiao Sha,

Chenliang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hebeizi and Sun, Benyuan and Yang, Yi and Liang, Zihao and Yin, Zihao and Sha, Xiao and Wang, Chenliang}, title = {UniTalking: A Unified Audio-Video Framework for Talking Portrait Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4647-4656} }
CADReasoner: Iterative Program Editing for CAD Reverse Engineering: Soslan Kabisov,

Vsevolod Kirichuk,

Andrey Volkov,

Marina Barannikov,

Gennadiy Savrasov,

Anton Konushin,

Andrey Kuznetsov,

Dmitrii Zhemchuzhnikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kabisov_2026_CVPR, author = {Kabisov, Soslan and Kirichuk, Vsevolod and Volkov, Andrey and Barannikov, Marina and Savrasov, Gennadiy and Konushin, Anton and Kuznetsov, Andrey and Zhemchuzhnikov, Dmitrii}, title = {CADReasoner: Iterative Program Editing for CAD Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6143-6153} }
MoVieDrive: Urban Scene Synthesis with Multi-Modal Multi-View Video Diffusion Transformer: Guile Wu,

David Huang,

Dongfeng Bai,

Bingbing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Guile and Huang, David and Bai, Dongfeng and Liu, Bingbing}, title = {MoVieDrive: Urban Scene Synthesis with Multi-Modal Multi-View Video Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4290-4299} }
S^2DiT: Sandwich Diffusion Transformer for Mobile Streaming Video Generation: Lin Zhao,

Yushu Wu,

Aleksei Lebedev,

Dishani Lahiri,

Meng Dong,

Arpit Sahni,

Michael Vasilkovsky,

Hao Chen,

Ju Hu,

Aliaksandr Siarohin,

Sergey Tulyakov,

Yanzhi Wang,

Anil Kag,

Yanyu Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lin and Wu, Yushu and Lebedev, Aleksei and Lahiri, Dishani and Dong, Meng and Sahni, Arpit and Vasilkovsky, Michael and Chen, Hao and Hu, Ju and Siarohin, Aliaksandr and Tulyakov, Sergey and Wang, Yanzhi and Kag, Anil and Li, Yanyu}, title = {S{\textasciicircum}2DiT: Sandwich Diffusion Transformer for Mobile Streaming Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4355-4365} }
Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving: Xuepei Yang,

Mingtao Feng,

Weisheng Dong,

Lin Chen,

Jie Feng,

Fangfang Wu,

Yufan Zhu,

Ajmal Saeed Mian; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xuepei and Feng, Mingtao and Dong, Weisheng and Chen, Lin and Feng, Jie and Wu, Fangfang and Zhu, Yufan and Mian, Ajmal Saeed}, title = {Physics-Informed Reward Framework for Vision-Language Driven Safe Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {942-951} }
VeCoR -- Velocity Contrastive Regularization for Flow Matching: Zong-Wei Hong,

Jing-Lun Li,

Lin-Ze Li,

Shen Zhang,

Yao Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Zong-Wei and Li, Jing-Lun and Li, Lin-Ze and Zhang, Shen and Tang, Yao}, title = {VeCoR -- Velocity Contrastive Regularization for Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4739-4748} }
Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding: Jeongwan Shin,

Jaehyeon Kim,

Donguk Ko,

Jaeho Choi; [pdf] [supp]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Jeongwan and Kim, Jaehyeon and Ko, Donguk and Choi, Jaeho}, title = {Can Language Models Understand mmWave Data? Benchmarking Large Language Models for mmWave Radar-Based Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2208-2219} }
FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views: Chaoyi Zhou,

Run Wang,

Feng Luo,

Mert D. Pesé,

Zhiwen Fan,

Yiqi Zhong,

Siyu Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chaoyi and Wang, Run and Luo, Feng and Pes\'e, Mert D. and Fan, Zhiwen and Zhong, Yiqi and Huang, Siyu}, title = {FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {129-138} }
HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes: Mauricio Soroco,

Francesco Pittaluga,

Zaid Tasneem,

Abhishek Aich,

Bingbing Zhuang,

Wuyang Chen,

Manmohan Chandraker,

Ziyu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soroco_2026_CVPR, author = {Soroco, Mauricio and Pittaluga, Francesco and Tasneem, Zaid and Aich, Abhishek and Zhuang, Bingbing and Chen, Wuyang and Chandraker, Manmohan and Jiang, Ziyu}, title = {HorizonWeaver: Generalizable Multi-Level Semantic Editing for Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {952-959} }
Text-Driven Reasoning Video Editing via Reinforcement Learning on Digital Twin Representations: Yiqing Shen,

Chenjia Li,

Mathias Unberath; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Yiqing and Li, Chenjia and Unberath, Mathias}, title = {Text-Driven Reasoning Video Editing via Reinforcement Learning on Digital Twin Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3945-3954} }
GReD-RSITR: A Generative Re-Examined Discriminative Framework for Remote Sensing Image-Text Retrieval: Shuhuai Wang,

Songwei Pei,

Bingfeng Liu,

Yuanzhou Huang,

Qian Li,

Shangguang Wang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shuhuai and Pei, Songwei and Liu, Bingfeng and Huang, Yuanzhou and Li, Qian and Wang, Shangguang}, title = {GReD-RSITR: A Generative Re-Examined Discriminative Framework for Remote Sensing Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6312-6321} }
Bridging Day and Night: Unsupervised Cross-Domain Re-Identification with Synergistic Prompt and Prototype Learning: Jiyang Xu,

Rui Liu,

Hang Dai; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiyang and Liu, Rui and Dai, Hang}, title = {Bridging Day and Night: Unsupervised Cross-Domain Re-Identification with Synergistic Prompt and Prototype Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6612-6621} }
OminPSD: Layered PSD Generation with Diffusion Transformer: Cheng Liu,

Yiren Song,

Haofan Wang,

Mike Zheng Shou; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Cheng and Song, Yiren and Wang, Haofan and Shou, Mike Zheng}, title = {OminPSD: Layered PSD Generation with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4190-4201} }
CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation: Pingrui Lai,

Yanshan Zhou,

Zihao Xie,

Hua Yang; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Pingrui and Zhou, Yanshan and Xie, Zihao and Yang, Hua}, title = {CrowdVerse: A Bidirectional Reality-Calibrated Benchmark for Crowd Understanding and Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2197-2207} }
Rolling and Denoising: Rethinking Dynamic Modal Fusion for Multi-Modal Object Re-Identification: Shihao Li,

Huaibo Huang,

Aihua Zheng,

Jin Tang,

Ran He; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shihao and Huang, Huaibo and Zheng, Aihua and Tang, Jin and He, Ran}, title = {Rolling and Denoising: Rethinking Dynamic Modal Fusion for Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6560-6569} }
Less is More: Multimodal Human Pose Estimation with Selective Fusion: Yutong Xu,

Qianyi Huang,

Xu Chen; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yutong and Huang, Qianyi and Chen, Xu}, title = {Less is More: Multimodal Human Pose Estimation with Selective Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3575-3584} }
DenoiseGS: Gaussian Reconstruction Model for Burst Denoising: Yongsen Cheng,

Yuanhao Cai,

Yulun Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yongsen and Cai, Yuanhao and Zhang, Yulun}, title = {DenoiseGS: Gaussian Reconstruction Model for Burst Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5096-5105} }
Video4Spatial: Towards Visuospatial Intelligence with Context-Guided Video Generation: Zeqi Xiao,

Yiwei Zhao,

Lingxiao Li,

Yushi Lan,

Ning Yu,

Rahul Garg,

Mohammad H. Taghavi,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Zeqi and Zhao, Yiwei and Li, Lingxiao and Lan, Yushi and Yu, Ning and Garg, Rahul and Taghavi, Mohammad H. and Pan, Xingang}, title = {Video4Spatial: Towards Visuospatial Intelligence with Context-Guided Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3933-3944} }
Optical Tolerance-Compensated Diffusion Model for Image Restoration: Hongji Dong,

Huihui Gong,

Tanli Zuo,

Yu Zhao,

Jin Dai,

Jingduo Tian,

Kai Ni; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Hongji and Gong, Huihui and Zuo, Tanli and Zhao, Yu and Dai, Jin and Tian, Jingduo and Ni, Kai}, title = {Optical Tolerance-Compensated Diffusion Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5064-5074} }
EI: Early Intervention for Multimodal Imaging Based Disease Recognition: Qijie Wei,

HaiLan Lin,

Xirong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Qijie and Lin, HaiLan and Li, Xirong}, title = {EI: Early Intervention for Multimodal Imaging Based Disease Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5632-5640} }
Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game: Jeonghwan Kim,

Wontaek Kim,

Yidan Lu,

Jin Cheng,

Fatemeh Zargarbashi,

Zicheng Zeng,

Zekun Qi,

Zhiyang Dou,

Nitish Sontakke,

Donghoon Baek,

Li Yi,

Sehoon Ha,

Tianyu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonghwan and Kim, Wontaek and Lu, Yidan and Cheng, Jin and Zargarbashi, Fatemeh and Zeng, Zicheng and Qi, Zekun and Dou, Zhiyang and Sontakke, Nitish and Baek, Donghoon and Yi, Li and Ha, Sehoon and Li, Tianyu}, title = {Switch-JustDance: Benchmarking Whole-Body Motion Tracking Controllers Using a Commercial Console Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1421-1430} }
Visual Reasoning Through Tool-Supervised Reinforcement Learning: Qihua Dong,

Gozde Sahin,

Pei Wang,

Zhaowei Cai,

Robik Shrestha,

Hao Yang,

Davide Modolo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Qihua and Sahin, Gozde and Wang, Pei and Cai, Zhaowei and Shrestha, Robik and Yang, Hao and Modolo, Davide}, title = {Visual Reasoning Through Tool-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8993-9002} }
Cross-Resolution Diffusion Models Via Network Pruning: Jiaxuan Ren,

Junhan Zhu,

Huan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Jiaxuan and Zhu, Junhan and Wang, Huan}, title = {Cross-Resolution Diffusion Models Via Network Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4224-4233} }
GR-Diffusion: Graph-Guided Relational-Aware Diffusion via Attention Alignment: Xiaochen Liu,

Xiaoting Xi,

Chao Yin,

Xiaoqiang Li,

Daoguo Dong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaochen and Xi, Xiaoting and Yin, Chao and Li, Xiaoqiang and Dong, Daoguo}, title = {GR-Diffusion: Graph-Guided Relational-Aware Diffusion via Attention Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3759-3768} }
DM^3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking: Weiran Li,

Yeqiang Liu,

Yijie Wei,

Mina Han,

Qiannan Guo,

Zhenbo Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiran and Liu, Yeqiang and Wei, Yijie and Han, Mina and Guo, Qiannan and Li, Zhenbo}, title = {DM{\textasciicircum}3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8398-8407} }
When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI: Yanhui Li,

Qi Zhou,

Zhihong Xu,

Huizhong Guo,

Wenhai Wang,

Dongxia Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanhui and Zhou, Qi and Xu, Zhihong and Guo, Huizhong and Wang, Wenhai and Wang, Dongxia}, title = {When Harmful Content Goes Invisible: Unveiling Perception Failure of LVLMs with CAMOUHARMTI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2038-2048} }
One Layer Is Enough: Adapting Pretrained Visual Encoders for Image Generation: Yuan Gao,

Chen Chen,

Jiatao Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuan and Chen, Chen and Gu, Jiatao}, title = {One Layer Is Enough: Adapting Pretrained Visual Encoders for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4688-4697} }
Memory-efficient Continual Learning with Prototypical Exemplar Condensation: M.-Duong Nguyen,

Thien-Thanh Dao,

Le-Tuan Nguyen,

Dung D. Le,

Kok-Seng Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, M.-Duong and Dao, Thien-Thanh and Nguyen, Le-Tuan and Le, Dung D. and Wong, Kok-Seng}, title = {Memory-efficient Continual Learning with Prototypical Exemplar Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7675-7685} }
Eevee: Towards Close-up High-resolution Video-based Virtual Try-on: Jianhao Zeng,

Yancheng Bai,

Ruidong Chen,

Xuanpu Zhang,

Lei Sun,

Dongyang Jin,

Ryan Xu,

Nannan Zhang,

Dan Song,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Jianhao and Bai, Yancheng and Chen, Ruidong and Zhang, Xuanpu and Sun, Lei and Jin, Dongyang and Xu, Ryan and Zhang, Nannan and Song, Dan and Chu, Xiangxiang}, title = {Eevee: Towards Close-up High-resolution Video-based Virtual Try-on}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4614-4624} }
Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA: Yujie Wang,

Hu Zhang,

Jiye Liang,

Zhiqiang Wang,

Hongye Tan,

Ru Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yujie and Zhang, Hu and Liang, Jiye and Wang, Zhiqiang and Tan, Hongye and Li, Ru}, title = {Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9282-9293} }
See Tomorrow, Act Today: Foresight-Driven Autonomous Driving: Bozhou Zhang,

Nan Song,

Yuang Wang,

Jiankang Deng,

Xiatian Zhu,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bozhou and Song, Nan and Wang, Yuang and Deng, Jiankang and Zhu, Xiatian and Zhang, Li}, title = {See Tomorrow, Act Today: Foresight-Driven Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1180-1190} }
CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking: Zhenyu Wu,

Tengfei Shi,

Xuehao Wang,

Ming Li,

Chenglizhao Chen,

Wenfeng Song,

Aimin Hao; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenyu and Shi, Tengfei and Wang, Xuehao and Li, Ming and Chen, Chenglizhao and Song, Wenfeng and Hao, Aimin}, title = {CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7241-7250} }
NumeriKontrol: Adding Numeric Control to Diffusion Transformers for Instruction-based Image Editing: Zhenyu Xu,

Xiaoqi Shen,

Haotian Nan,

Xinyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhenyu and Shen, Xiaoqi and Nan, Haotian and Zhang, Xinyu}, title = {NumeriKontrol: Adding Numeric Control to Diffusion Transformers for Instruction-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4389-4399} }
Direct Language Embedding Enables Gaussian Splatting for Large Scenes: Zhida Li,

Jianqiao Zhu,

Hejin Huang,

Yipeng Qin,

Sibei Yang,

Guanbin Li; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhida and Zhu, Jianqiao and Huang, Hejin and Qin, Yipeng and Yang, Sibei and Li, Guanbin}, title = {Direct Language Embedding Enables Gaussian Splatting for Large Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7231-7240} }
GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking: Zekun Qian,

Ruize Han,

Zhixiang Wang,

Liang Wan,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Zekun and Han, Ruize and Wang, Zhixiang and Wan, Liang and Feng, Wei}, title = {GOVTrack: Towards Generative Open-Vocabulary Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1872-1882} }
AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning: Jianheng Tang,

Jingyu He,

Kejia Fan,

Run He,

Jingchao Wang,

Anfeng Liu,

Houbing Herbert Song,

Leye Wang,

Zhanxing Zhu,

Huiping Zhuang,

Yunhuai Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jianheng and He, Jingyu and Fan, Kejia and He, Run and Wang, Jingchao and Liu, Anfeng and Song, Houbing Herbert and Wang, Leye and Zhu, Zhanxing and Zhuang, Huiping and Liu, Yunhuai}, title = {AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7768-7778} }
DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering: Guillermo Figueroa Araneda,

Iris Dania Jimenez,

Florian Hofherr,

Manny Ko,

Hector Andrade-Loarca,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Araneda_2026_CVPR, author = {Araneda, Guillermo Figueroa and Jimenez, Iris Dania and Hofherr, Florian and Ko, Manny and Andrade-Loarca, Hector and Cremers, Daniel}, title = {DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8461-8470} }
AceMIL: Ordinal-Aware Multiple Instance Learning for Pathological Progression Analysis: Shijie Li,

Yiming Chen,

Yingyun Gong,

Hongwen Zhou,

Feng-Jung Chen,

Xieping Gao,

Zhineng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shijie and Chen, Yiming and Gong, Yingyun and Zhou, Hongwen and Chen, Feng-Jung and Gao, Xieping and Chen, Zhineng}, title = {AceMIL: Ordinal-Aware Multiple Instance Learning for Pathological Progression Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5336-5346} }
SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery: Meng Cao,

Xingyu Li,

Xue Liu,

Ian Reid,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Meng and Li, Xingyu and Liu, Xue and Reid, Ian and Liang, Xiaodan}, title = {SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7176-7187} }
Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI: Mingjie Li,

Edward Kim,

Yue Zhao,

Ehsan Adeli,

Kilian M. Pohl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingjie and Kim, Edward and Zhao, Yue and Adeli, Ehsan and Pohl, Kilian M.}, title = {Modality-Aware and Anatomical Vector-Quantized Autoencoding for Multimodal Brain MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1211-1220} }
One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition: Balaji Darur,

Amanmeet Garg,

Makarand Tapaswi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Darur_2026_CVPR, author = {Darur, Balaji and Garg, Amanmeet and Tapaswi, Makarand}, title = {One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8268-8279} }
DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer: Soichiro Okazaki,

Tatsuya Sasaki,

Hiroki Ohashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Okazaki_2026_CVPR, author = {Okazaki, Soichiro and Sasaki, Tatsuya and Ohashi, Hiroki}, title = {DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6890-6900} }
EggHand: A Multimodal Foundation Model for Egocentric Hand Pose Forecasting: Jaeyoung Choi,

Hyeondong Kim,

Yujin Kim,

Daehee Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeyoung and Kim, Hyeondong and Kim, Yujin and Park, Daehee}, title = {EggHand: A Multimodal Foundation Model for Egocentric Hand Pose Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3521-3531} }
RoadTones: Tone Controllable Text Generation from Road Event Videos: Chirag Parikh,

Siddhi Pravin Lipare,

Ravi Kiran Sarvadevabhatla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parikh_2026_CVPR, author = {Parikh, Chirag and Lipare, Siddhi Pravin and Sarvadevabhatla, Ravi Kiran}, title = {RoadTones: Tone Controllable Text Generation from Road Event Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1019-1028} }
SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts: Shun Inadumi,

Shohei Tanaka,

Tosho Hirasawa,

Atsushi Hashimoto,

Koichiro Yoshino,

Yoshitaka Ushiku; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Inadumi_2026_CVPR, author = {Inadumi, Shun and Tanaka, Shohei and Hirasawa, Tosho and Hashimoto, Atsushi and Yoshino, Koichiro and Ushiku, Yoshitaka}, title = {SciPostGen: Bridging the Gap between Scientific Papers and Poster Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2131-2141} }
DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition: Yang Yang,

Kai Xu,

Junyao Hou,

Miao Zhang,

Xiang Li,

Zhenghua Chen,

Yingxue Gao,

Min Wu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yang and Xu, Kai and Hou, Junyao and Zhang, Miao and Li, Xiang and Chen, Zhenghua and Gao, Yingxue and Wu, Min}, title = {DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7789-7798} }
SemanticMoments: Training-Free Motion Similarity via Third Moment Features: Saar Huberman,

Kfir Goldberg,

Or Patashnik,

Sagie Benaim,

Ron Mokady; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huberman_2026_CVPR, author = {Huberman, Saar and Goldberg, Kfir and Patashnik, Or and Benaim, Sagie and Mokady, Ron}, title = {SemanticMoments: Training-Free Motion Similarity via Third Moment Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8419-8428} }
Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction: Chen Ziwen,

Hao Tan,

Peng Wang,

Zexiang Xu,

Li Fuxin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ziwen_2026_CVPR, author = {Ziwen, Chen and Tan, Hao and Wang, Peng and Xu, Zexiang and Fuxin, Li}, title = {Long-LRM++: Preserving Fine Details in Feed-Forward Wide-Coverage Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {370-380} }
VisionCreator: A Native Visual-Generation Agentic Model with Understanding, Thinking, Planning and Creation: Jinxiang Lai,

Zexin Lu,

Jiajun He,

Rongwei Quan,

Wenzhe Zhao,

Qinyu Yang,

Qi Chen,

Qin Lin,

Chuyue Li,

Tao Gao,

Yuhao Shan,

Song Guo,

Qinglin Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Jinxiang and Lu, Zexin and He, Jiajun and Quan, Rongwei and Zhao, Wenzhe and Yang, Qinyu and Chen, Qi and Lin, Qin and Li, Chuyue and Gao, Tao and Shan, Yuhao and Guo, Song and Lu, Qinglin}, title = {VisionCreator: A Native Visual-Generation Agentic Model with Understanding, Thinking, Planning and Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4140-4149} }
PEdit: Pareto-Guided Image Editing via Dynamic Latent Trajectory Control: Sooyeon Park,

Jaeil Park,

Sung-Bae Cho; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Sooyeon and Park, Jaeil and Cho, Sung-Bae}, title = {PEdit: Pareto-Guided Image Editing via Dynamic Latent Trajectory Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4800-4809} }
QDM: Quadtree-Based Region-Adaptive Sparse Diffusion Models for Efficient Image Super-Resolution: Donglin Yang,

Paul Vicol,

Xiaojuan Qi,

Renjie Liao,

Xiaofan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Donglin and Vicol, Paul and Qi, Xiaojuan and Liao, Renjie and Zhang, Xiaofan}, title = {QDM: Quadtree-Based Region-Adaptive Sparse Diffusion Models for Efficient Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5044-5053} }
Weakly Supervised Micro-Expression Spotting based on Boundary Refinement Mechanism and Cross-subject Learning Representation: Zhihua Xie,

Haolin Chang,

Guohua Miao,

Jianing Chen; [pdf]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Zhihua and Chang, Haolin and Miao, Guohua and Chen, Jianing}, title = {Weakly Supervised Micro-Expression Spotting based on Boundary Refinement Mechanism and Cross-subject Learning Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3428-3437} }
Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding: Wanying Zhou,

Zhuo Chen,

Jianzhi Lu,

Chenxi Ma,

Weimin Tan,

Bo Yan; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wanying and Chen, Zhuo and Lu, Jianzhi and Ma, Chenxi and Tan, Weimin and Yan, Bo}, title = {Hi3Doc: Hierarchical Tri-Level Representations for Multimodal Long-Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2721-2730} }
Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations: Toshimichi Aota,

Akinori Hashimoto,

Naoto Sekizuka,

Takayuki Okatani; [pdf] [supp]
[bibtex]
@InProceedings{Aota_2026_CVPR, author = {Aota, Toshimichi and Hashimoto, Akinori and Sekizuka, Naoto and Okatani, Takayuki}, title = {Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6932-6942} }
SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification: Jun Wei Hsieh,

Ying-Hsuan Wu,

Yi-Kuan Hsieh,

Xin Li,

Kuan-Chuan Peng,

Ming-Ching Chang; [pdf]
[bibtex]
@InProceedings{Hsieh_2026_CVPR, author = {Hsieh, Jun Wei and Wu, Ying-Hsuan and Hsieh, Yi-Kuan and Li, Xin and Peng, Kuan-Chuan and Chang, Ming-Ching}, title = {SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6839-6848} }
PSIM: Perceptual Similarity Index Measure: Md Eimran Hossain Eimon,

Hari Kalva; [pdf] [supp]
[bibtex]
@InProceedings{Eimon_2026_CVPR, author = {Eimon, Md Eimran Hossain and Kalva, Hari}, title = {PSIM: Perceptual Similarity Index Measure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8564-8574} }
StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios: Yifei Wang,

Zhenkai Li,

Tianwen Qian,

Huanran Zheng,

Zheng Wang,

Yuqian Fu,

Xiaoling Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yifei and Li, Zhenkai and Qian, Tianwen and Zheng, Huanran and Wang, Zheng and Fu, Yuqian and Wang, Xiaoling}, title = {StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9422-9432} }
HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion: Aihua Mao,

Jun Yang,

Yong-Jin Liu,

Ying He; [pdf]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Aihua and Yang, Jun and Liu, Yong-Jin and He, Ying}, title = {HEDA: Hyperbolic-Euclidean Dual Adaptation for Robust Real-World Point Cloud Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {149-159} }
OffNadirLoc: Benchmark and Framework for Challenging UAV-to-Satellite Geo-Localization under Large Off-Nadir Views: Qian Qiao,

Wenye Liu,

Ting Liu,

Jiuhe Shu,

Peng Wang; [pdf]
[bibtex]
@InProceedings{Qiao_2026_CVPR, author = {Qiao, Qian and Liu, Wenye and Liu, Ting and Shu, Jiuhe and Wang, Peng}, title = {OffNadirLoc: Benchmark and Framework for Challenging UAV-to-Satellite Geo-Localization under Large Off-Nadir Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6394-6403} }
PLCReg: Correlation-Aware Polar-Linear Attention for Guiding Medical Image Registration: Yedi Zhang,

Wenhui Huang,

Yuanjie Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yedi and Huang, Wenhui and Zheng, Yuanjie}, title = {PLCReg: Correlation-Aware Polar-Linear Attention for Guiding Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5535-5544} }
PhySe-RPO: Physics and Semantics Guided Relative Policy Optimization for Diffusion-Based Surgical Smoke Removal: Zining Fang,

Cheng Xue,

Chunhui Liu,

Bin Xu,

Ming Chen,

Xiaowei Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Zining and Xue, Cheng and Liu, Chunhui and Xu, Bin and Chen, Ming and Hu, Xiaowei}, title = {PhySe-RPO: Physics and Semantics Guided Relative Policy Optimization for Diffusion-Based Surgical Smoke Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5347-5356} }
DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework: Yani Zhang,

Dongming Wu,

Hao Shi,

Yingfei Liu,

Tiancai Wang,

Xingping Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yani and Wu, Dongming and Shi, Hao and Liu, Yingfei and Wang, Tiancai and Dong, Xingping}, title = {DEGround: An Effective Baseline for Ego-centric 3D Visual Grounding With a Homogeneous Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3103-3113} }
Do Audio-Visual Large Language Models Really See and Hear?: Ramaneswaran Selvakumar,

Kaousheik Jayakumar,

S Sakshi,

Sreyan Ghosh,

Ruohan Gao,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Selvakumar_2026_CVPR, author = {Selvakumar, Ramaneswaran and Jayakumar, Kaousheik and Sakshi, S and Ghosh, Sreyan and Gao, Ruohan and Manocha, Dinesh}, title = {Do Audio-Visual Large Language Models Really See and Hear?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5892-5902} }
Prompt-Guided Image Editing with Masked Logit Nudging in Visual Autoregressive Models: Amir El-Ghoussani,

Marc Hölle,

Gustavo Carneiro,

Vasileios Belagiannis; [pdf] [supp]
[bibtex]
@InProceedings{El-Ghoussani_2026_CVPR, author = {El-Ghoussani, Amir and H\"olle, Marc and Carneiro, Gustavo and Belagiannis, Vasileios}, title = {Prompt-Guided Image Editing with Masked Logit Nudging in Visual Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4810-4820} }
Beyond Pixel Loss: Video-INRs Prefer Perceptual Optimization: Junqi Shi,

Wuyang Cong,

Ming Lu,

Bowei Xu,

Zhan Ma; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Junqi and Cong, Wuyang and Lu, Ming and Xu, Bowei and Ma, Zhan}, title = {Beyond Pixel Loss: Video-INRs Prefer Perceptual Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4843-4854} }
KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification: Christine Dewi,

Dhananjay R Thiruvady,

Nayyar Zaidi; [pdf] [supp]
[bibtex]
@InProceedings{Dewi_2026_CVPR, author = {Dewi, Christine and Thiruvady, Dhananjay R and Zaidi, Nayyar}, title = {KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8766-8775} }
When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements: Chi Zhang,

Yulang Gao,

Jiachen Zou,

Chen Wei,

Quanying Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Gao, Yulang and Zou, Jiachen and Wei, Chen and Liu, Quanying}, title = {When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8111-8120} }
Shape and Texture Recognition in Large Vision-Language Models: Sagi Eppel,

Mor Bismut,

Alona Strugatski; [pdf] [supp]
[bibtex]
@InProceedings{Eppel_2026_CVPR, author = {Eppel, Sagi and Bismut, Mor and Strugatski, Alona}, title = {Shape and Texture Recognition in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1839-1849} }
StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes: Souheib Ben Mabrouk,

Jean-Emmanuel Deschaud,

Eva Coupeté,

Thomas Derbanne,

Nicolas Rahmouni; [pdf] [supp]
[bibtex]
@InProceedings{Ben_Mabrouk_2026_CVPR, author = {Ben Mabrouk, Souheib and Deschaud, Jean-Emmanuel and Coupet\'e, Eva and Derbanne, Thomas and Rahmouni, Nicolas}, title = {StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8481-8491} }
Depth Adaptive Efficient Visual Autoregressive Modeling: Chunliang Li,

Tianze Cao,

Sanyuan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunliang and Cao, Tianze and Zhao, Sanyuan}, title = {Depth Adaptive Efficient Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4213-4223} }
TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution: Honghui Xu,

Chuangjie Fang,

Yiqun Meng,

Jiawei Jiang,

Sixian Chan,

Shiqing Zhang,

Jianwei Zheng; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Honghui and Fang, Chuangjie and Meng, Yiqun and Jiang, Jiawei and Chan, Sixian and Zhang, Shiqing and Zheng, Jianwei}, title = {TPTransformer: Tensor-Tensor Product Transformer for Hyperspectral Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1670-1679} }
Mitigating Vision-Text Order Bias in Vision-Language Model: Weilin Gan,

Yifan Song,

Zhuocheng Yu,

Sujian Li; [pdf]
[bibtex]
@InProceedings{Gan_2026_CVPR, author = {Gan, Weilin and Song, Yifan and Yu, Zhuocheng and Li, Sujian}, title = {Mitigating Vision-Text Order Bias in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9664-9673} }
An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning: Pengzhou Chen,

Qiling Tang,

XinYu Chai,

Rong Liu,

Zhi Li,

Liman Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhou and Tang, Qiling and Chai, XinYu and Liu, Rong and Li, Zhi and Liu, Liman}, title = {An Interpretable Alzheimer's Disease Diagnosis Model via Gray Matter Attention Guided Counterfactual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3241-3251} }
M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?: Haolong Yan,

Kaijun Tan,

Yeqing Shen,

Xin Huang,

Jia Wang,

Zheng Ge,

Xiangyu Zhang,

Si Li,

Daxin Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haolong and Tan, Kaijun and Shen, Yeqing and Huang, Xin and Wang, Jia and Ge, Zheng and Zhang, Xiangyu and Li, Si and Jiang, Daxin}, title = {M-DocSum: Do LVLMs Genuinely Comprehend Interleaved Image-Text in Document Summarization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2731-2741} }
Jano: Adaptive Diffusion Generation with Early-Stage Convergence Awareness: Yuyang Chen,

Linqian Zeng,

Yijin Zhou,

Hengjie Li,

Jidong Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zeng, Linqian and Zhou, Yijin and Li, Hengjie and Zhai, Jidong}, title = {Jano: Adaptive Diffusion Generation with Early-Stage Convergence Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4486-4494} }
Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection: Wenbing Zhu,

Jianing Liang,

Linjie Cheng,

Yurui Pan,

Zhuhao Chen,

Qingwang Yan,

Yudong Cheng,

Jianghui Zhang,

Mingmin Chi,

Bo Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenbing and Liang, Jianing and Cheng, Linjie and Pan, Yurui and Chen, Zhuhao and Yan, Qingwang and Cheng, Yudong and Zhang, Jianghui and Chi, Mingmin and Peng, Bo}, title = {Real-IAD MVN: A Multi-View Normal Vector Dataset and Benchmark for High-Fidelity Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2060-2068} }
AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation: Yulu Wu,

Jiujun Cheng,

Haowen Wang,

Dengyang Suo,

Pei Ren,

Qichao Mao,

Shangce Gao,

Yakun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yulu and Cheng, Jiujun and Wang, Haowen and Suo, Dengyang and Ren, Pei and Mao, Qichao and Gao, Shangce and Huang, Yakun}, title = {AOMGen: Photoreal, Physics-Consistent Demonstration Generation for Articulated Object Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3082-3091} }
PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation: Xiangyu Li,

Chen Wang,

Yumao Liu,

Dengbo He,

Jiahao Zhang,

Ke Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiangyu and Wang, Chen and Liu, Yumao and He, Dengbo and Zhang, Jiahao and Ma, Ke}, title = {PAVE: An End-to-End Dataset for Production Autonomous Vehicle Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1010-1018} }
Unified Urban Tuning: Co-Enhancing Satellite and Street View Reasoning with a Progressive Tuning Framework: Yong Li,

Weiyu Zhang,

Ling Dai,

Jian Yang,

Dacheng Yin,

Sirun Li,

Jing Lyu,

Fengyun Rao,

Fan Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yong and Zhang, Weiyu and Dai, Ling and Yang, Jian and Yin, Dacheng and Li, Sirun and Lyu, Jing and Rao, Fengyun and Zhang, Fan}, title = {Unified Urban Tuning: Co-Enhancing Satellite and Street View Reasoning with a Progressive Tuning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6302-6311} }
OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation: Zhishan Zhou,

Siyuan Wei,

Zengran Wang,

Chunjie Wang,

Xiaosheng Yan,

Xiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhishan and Wei, Siyuan and Wang, Zengran and Wang, Chunjie and Yan, Xiaosheng and Liu, Xiao}, title = {OpenTrack3D: Towards Accurate and Generalizable Open-Vocabulary 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {233-242} }
Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning: Aoqiang Zhu,

Min Hu,

Yan Xing,

Yiming Tang; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Aoqiang and Hu, Min and Xing, Yan and Tang, Yiming}, title = {Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6921-6931} }
Anomaly Agent: Unified Anomaly Retrieval and Synthesis Before Manufacturing: Xiangyue Li,

Xiaoyang Wang,

Siyue Yao,

Mingjie Sun,

Yupei Wu; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiangyue and Wang, Xiaoyang and Yao, Siyue and Sun, Mingjie and Wu, Yupei}, title = {Anomaly Agent: Unified Anomaly Retrieval and Synthesis Before Manufacturing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4345-4354} }
ReConText3D: Replay-based Continual Text-to-3D Generation: Muhammad Ahmed Ullah Khan,

Muhammad Haris Bin Amir,

Didier Stricker,

Muhammad Zeshan Afzal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2026_CVPR, author = {Khan, Muhammad Ahmed Ullah and Bin Amir, Muhammad Haris and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {ReConText3D: Replay-based Continual Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7893-7902} }
RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning: Jingqi Xu,

Jingxi Lu,

Chenghao Li,

Sreetama Sarkar,

Souvik Kundu,

Peter A Beerel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jingqi and Lu, Jingxi and Li, Chenghao and Sarkar, Sreetama and Kundu, Souvik and A Beerel, Peter}, title = {RedVTP: Training-Free Acceleration of Diffusion Vision-Language Models Inference via Masked Token-Guided Visual Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2783-2792} }
Tiny Inference-Time Scaling with Latent Verifiers: Davide Bucciarelli,

Evelyn Turri,

Lorenzo Baraldi,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bucciarelli_2026_CVPR, author = {Bucciarelli, Davide and Turri, Evelyn and Baraldi, Lorenzo and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Tiny Inference-Time Scaling with Latent Verifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2873-2882} }
TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation: Shuo Jin,

Siyue Yu,

Bingfeng Zhang,

Chao Yao,

Meiqin Liu,

Jimin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Yao, Chao and Liu, Meiqin and Xiao, Jimin}, title = {TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7472-7482} }
ControlPose: High-Fidelity Pose-Controlled Image Generation with Multi-Faceted Pose Disentanglement: Zhongjing Du,

Xiao Chen,

Zhiwei Nie,

Yuxuan Chen,

Chang Liu,

Xiangyang Ji,

Jie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Zhongjing and Chen, Xiao and Nie, Zhiwei and Chen, Yuxuan and Liu, Chang and Ji, Xiangyang and Chen, Jie}, title = {ControlPose: High-Fidelity Pose-Controlled Image Generation with Multi-Faceted Pose Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3851-3860} }
Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models: Junlong Ke,

Zichen Wen,

Boxue Yang,

Yantai Yang,

Xuyang Liu,

Chenfei Liao,

Zhaorun Chen,

Shaobo Wang,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Junlong and Wen, Zichen and Yang, Boxue and Yang, Yantai and Liu, Xuyang and Liao, Chenfei and Chen, Zhaorun and Wang, Shaobo and Zhang, Linfeng}, title = {Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9131-9142} }
P^2CS: Parallel Point Cloud Pre-Training with Semantic Consistency: Linshuang Diao,

Sensen Song,

Yuan Jia,

Yurong Qian,

Dayong Ren; [pdf]
[bibtex]
@InProceedings{Diao_2026_CVPR, author = {Diao, Linshuang and Song, Sensen and Jia, Yuan and Qian, Yurong and Ren, Dayong}, title = {P{\textasciicircum}2CS: Parallel Point Cloud Pre-Training with Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5117-5126} }
PHYLOMAN: Generative Behavior Control via Fusing LLM Planning and Physics-based Control: Jusheng Zhang,

Jinzhou Tang,

Sidi Liu,

Jian Wang,

Keze Wang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Tang, Jinzhou and Liu, Sidi and Wang, Jian and Wang, Keze}, title = {PHYLOMAN: Generative Behavior Control via Fusing LLM Planning and Physics-based Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3585-3597} }
Super Sparse DETR: YOLO-Competitive Convergence and Acceleration: Hebao Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hebao}, title = {Super Sparse DETR: YOLO-Competitive Convergence and Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6677-6684} }
THEval. Evaluation Framework for Talking Head Video Generation: Nabyl Quignon,

Baptiste Chopin,

Yaohui Wang,

Antitza Dantcheva; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Quignon_2026_CVPR, author = {Quignon, Nabyl and Chopin, Baptiste and Wang, Yaohui and Dantcheva, Antitza}, title = {THEval. Evaluation Framework for Talking Head Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1943-1953} }
Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning: Jungwon Choi,

Eunwoo Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jungwon and Kim, Eunwoo}, title = {Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9368-9377} }
HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination: Hui Yu,

Xiao Ke,

Zhihong Zeng,

Huangbiao Xu,

Huanqi Wu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Hui and Ke, Xiao and Zeng, Zhihong and Xu, Huangbiao and Wu, Huanqi}, title = {HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8337-8346} }
OmniHead: A Unified Model for Dynamic Nonverbal Facial Behaviors: Pierre Vuillecard,

Jean-Marc Odobez; [pdf] [supp]
[bibtex]
@InProceedings{Vuillecard_2026_CVPR, author = {Vuillecard, Pierre and Odobez, Jean-Marc}, title = {OmniHead: A Unified Model for Dynamic Nonverbal Facial Behaviors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3553-3564} }
Online Interpretable Matrix Decomposition for Large-Scale Streaming Data: Muhammad A. A. Abdelgawad,

Abdelrahman B. M. Eldaly,

Meng Xinmin,

Peng Jing,

Abdurrashid Ibrahim Sanka,

Ray C.C. Cheung,

Hong Yan; [pdf] [supp]
[bibtex]
@InProceedings{Abdelgawad_2026_CVPR, author = {Abdelgawad, Muhammad A. A. and Eldaly, Abdelrahman B. M. and Xinmin, Meng and Jing, Peng and Sanka, Abdurrashid Ibrahim and Cheung, Ray C.C. and Yan, Hong}, title = {Online Interpretable Matrix Decomposition for Large-Scale Streaming Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7030-7039} }
ROSE: Retrieval-Oriented Segmentation Enhancement: Song Tang,

Guangquan Jie,

Henghui Ding,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Song and Jie, Guangquan and Ding, Henghui and Jiang, Yu-Gang}, title = {ROSE: Retrieval-Oriented Segmentation Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7398-7407} }
How far have we gone in Generative Image Restoration? A study on its capability, limitations and evaluation practices: Xiang Yin,

Jinfan Hu,

Zhiyuan You,

Kainan Yan,

Yu Tang,

Chao Dong,

Jinjin Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Xiang and Hu, Jinfan and You, Zhiyuan and Yan, Kainan and Tang, Yu and Dong, Chao and Gu, Jinjin}, title = {How far have we gone in Generative Image Restoration? A study on its capability, limitations and evaluation practices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4909-4919} }
VoxFace: Streaming Audio-Visual Synthesis via Relay-Style Multi-Token Prediction for Interactive Conversation: Junwen Xiong,

Chuanyue Li,

Peng Zhang; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Junwen and Li, Chuanyue and Zhang, Peng}, title = {VoxFace: Streaming Audio-Visual Synthesis via Relay-Style Multi-Token Prediction for Interactive Conversation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3543-3552} }
Jailbreaking Frontier Foundation Models Through Intention Deception: Xinhe Wang,

Katia Sycara,

Yaqi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinhe and Sycara, Katia and Xie, Yaqi}, title = {Jailbreaking Frontier Foundation Models Through Intention Deception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {666-674} }
PTF-CT: Polar-Aware Temporal-Frequential Iterative Reconstruction for Sparse-View CT: Borui Kang,

Guanyi Qin,

Chuanpu Li,

Yueming Jin; [pdf] [supp]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Borui and Qin, Guanyi and Li, Chuanpu and Jin, Yueming}, title = {PTF-CT: Polar-Aware Temporal-Frequential Iterative Reconstruction for Sparse-View CT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5214-5223} }
Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes: Weihang Ran,

Qingtian Zhu,

Mingdeng Cao,

Wei Yuan,

Isao Echizen,

Yinqiang Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Ran_2026_CVPR, author = {Ran, Weihang and Zhu, Qingtian and Cao, Mingdeng and Yuan, Wei and Echizen, Isao and Zheng, Yinqiang}, title = {Machine Vision-Oriented Appearance Design: Generate Natural And Robust Textures For 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1597-1607} }
Rethinking Compact (<1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions: Christos Kyrkou; [pdf] [supp]
[bibtex]
@InProceedings{Kyrkou_2026_CVPR, author = {Kyrkou, Christos}, title = {Rethinking Compact (\ensuremath{<}1M) Vision Models: Balancing Accuracy and Speed through Multi-Path Atrous Convolutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2710-2720} }
MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness: JaeHyuck Choi,

Minjun Kim,

Je Hyeong Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, JaeHyuck and Kim, Minjun and Hong, Je Hyeong}, title = {MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8524-8533} }
Two-Stage 3D Pulmonary Vessel Reconstruction via Trunk--Expansion Coupled Point Cloud Generation: Jie Zhang,

Yu Xin,

Guoqing Li; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jie and Xin, Yu and Li, Guoqing}, title = {Two-Stage 3D Pulmonary Vessel Reconstruction via Trunk--Expansion Coupled Point Cloud Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5245-5254} }
Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations: Souptik Sen,

Raneen Younis,

Zahra Ahmadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sen_2026_CVPR, author = {Sen, Souptik and Younis, Raneen and Ahmadi, Zahra}, title = {Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6080-6089} }
GRAFT: Graph-Based Affordance Transfer via Part Correspondence: Mengying Lin,

Utkarsh Mishra,

Ajay Mandlekar,

Danfei Xu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Mengying and Mishra, Utkarsh and Mandlekar, Ajay and Xu, Danfei}, title = {GRAFT: Graph-Based Affordance Transfer via Part Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8746-8755} }
Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark: Rajmund Nagy,

Hendric Voss,

Thanh Hoang-Minh,

Mihail Tsakov,

Teodor Nikolov,

Zeyi Zhang,

Tenglong Ao,

Sicheng Yang,

Shaoli Huang,

Yongkang Cheng,

M. Hamza Mughal,

Rishabh Dabral,

Kiran Chhatre,

Christian Theobalt,

Libin Liu,

Stefan Kopp,

Rachel McDonnell,

Michael Neff,

Taras Kucherenko,

Youngwoo Yoon,

Gustav Eje Henter; [pdf] [supp]
[bibtex]
@InProceedings{Nagy_2026_CVPR, author = {Nagy, Rajmund and Voss, Hendric and Hoang-Minh, Thanh and Tsakov, Mihail and Nikolov, Teodor and Zhang, Zeyi and Ao, Tenglong and Yang, Sicheng and Huang, Shaoli and Cheng, Yongkang and Mughal, M. Hamza and Dabral, Rishabh and Chhatre, Kiran and Theobalt, Christian and Liu, Libin and Kopp, Stefan and McDonnell, Rachel and Neff, Michael and Kucherenko, Taras and Yoon, Youngwoo and Henter, Gustav Eje}, title = {Towards Reliable Human Evaluations in Gesture Generation: Insights from a Community-Driven State-of-the-Art Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2152-2164} }
HM-Talker: Hybrid Motion Modeling for High-Fidelity Talking Head Synthesis: Shiyu Liu,

Kui Jiang,

Junjun Jiang,

Xianming Liu,

Xiaocheng Feng,

Fei Ma,

Hongxun Yao,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shiyu and Jiang, Kui and Jiang, Junjun and Liu, Xianming and Feng, Xiaocheng and Ma, Fei and Yao, Hongxun and Tian, Qi}, title = {HM-Talker: Hybrid Motion Modeling for High-Fidelity Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3729-3738} }
M^3D-BFS: a Multi-Stage Dynamic Fusion Strategy for Sample-Adaptive Multi-Modal Brain Network Analysis: Rui Dong,

Xiaotong Zhang,

Jiaxing Li,

Yueying Li,

Jiayin Wei,

Youyong Kong; [pdf]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Rui and Zhang, Xiaotong and Li, Jiaxing and Li, Yueying and Wei, Jiayin and Kong, Youyong}, title = {M{\textasciicircum}3D-BFS: a Multi-Stage Dynamic Fusion Strategy for Sample-Adaptive Multi-Modal Brain Network Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5419-5429} }
Gated Differential Linear Attention: A Linear-Time Decoder for High-Fidelity Medical Segmentation: Hongbo Zheng,

Afshin Bozorgpour,

Dorit Merhof,

Minjia Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hongbo and Bozorgpour, Afshin and Merhof, Dorit and Zhang, Minjia}, title = {Gated Differential Linear Attention: A Linear-Time Decoder for High-Fidelity Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5579-5588} }
POMA-3D: The Point Map Way to 3D Scene Understanding: Ye Mao,

Weixun Luo,

Ranran Huang,

Junpeng Jing,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Ye and Luo, Weixun and Huang, Ranran and Jing, Junpeng and Mikolajczyk, Krystian}, title = {POMA-3D: The Point Map Way to 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7282-7292} }
HippoMM: Hippocampal-inspired Multimodal Memory for Long Audiovisual Event Understanding: Yueqian Lin,

Jingyang Zhang,

Qinsi Wang,

Hancheng Ye,

Yuzhe Fu,

Yudong Liu,

Hai Helen Li,

Yiran Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yueqian and Zhang, Jingyang and Wang, Qinsi and Ye, Hancheng and Fu, Yuzhe and Liu, Yudong and Li, Hai Helen and Chen, Yiran}, title = {HippoMM: Hippocampal-inspired Multimodal Memory for Long Audiovisual Event Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5968-5977} }
BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding: Ziyi Zhao,

Jinzhao Zhou,

Xiaowei Jiang,

Beining Cao,

Wenhao Ma,

Yang Shen,

Ren Li,

Yu-Kai Wang,

Chin-teng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziyi and Zhou, Jinzhao and Jiang, Xiaowei and Cao, Beining and Ma, Wenhao and Shen, Yang and Li, Ren and Wang, Yu-Kai and Lin, Chin-teng}, title = {BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7050-7059} }
FREE: Uncertainty-Aware Autoregression for Parallel Diffusion Transformers: Xinwan Wen,

Bowen Li,

Jiajun Luo,

Ye Li,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Xinwan and Li, Bowen and Luo, Jiajun and Li, Ye and Wang, Zhi}, title = {FREE: Uncertainty-Aware Autoregression for Parallel Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4097-4107} }
Controllable Radar Simulation with Waveform Parameter Embedding: Weiqing Xiao,

Hao Huang,

Chonghao Zhong,

Yujie Lin,

Nan Wang,

Xiaoxue Chen,

Zhaoxi Chen,

Saining Zhang,

Shuocheng Yang,

Pierre Merriaux,

Lei Lei,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Weiqing and Huang, Hao and Zhong, Chonghao and Lin, Yujie and Wang, Nan and Chen, Xiaoxue and Chen, Zhaoxi and Zhang, Saining and Yang, Shuocheng and Merriaux, Pierre and Lei, Lei and Zhao, Hao}, title = {Controllable Radar Simulation with Waveform Parameter Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6424-6434} }
Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization: Chao Sun,

Junbo Zhang,

Chuanbo Zhu,

Mingjun Huang,

Bo Du; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Chao and Zhang, Junbo and Zhu, Chuanbo and Huang, Mingjun and Du, Bo}, title = {Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7272-7281} }
Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models: Qingtao Pan,

Zhihao Dou,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Qingtao and Dou, Zhihao and Li, Shuo}, title = {Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9726-9737} }
Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision: Junjie Chen,

Zezheng Liu,

Runxiang Liu,

Yuming Fang,

Yifan Zuo,

Jiebin Yan; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Zezheng and Liu, Runxiang and Fang, Yuming and Zuo, Yifan and Yan, Jiebin}, title = {Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7872-7882} }
DiFlowDubber: Discrete Flow Matching for Automated Video Dubbing via Cross-Modal Alignment and Synchronization: Ngoc-Son Nguyen,

Thanh V. T. Tran,

Jeongsoo Choi,

Hieu-Nghia Huynh-Nguyen,

Truong-Son Hy,

Van Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Ngoc-Son and Tran, Thanh V. T. and Choi, Jeongsoo and Huynh-Nguyen, Hieu-Nghia and Hy, Truong-Son and Nguyen, Van}, title = {DiFlowDubber: Discrete Flow Matching for Automated Video Dubbing via Cross-Modal Alignment and Synchronization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5838-5848} }
Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction: Xuanming Jiang,

Baoyi An,

Dingyu Nie,

Haoyu Ren,

Zhengwei Zou,

Yizhe Yang,

Jialie Shen,

Zhiwen Jin,

Xueming Qian,

Zhongyu Yang,

Guoshuai Zhao; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xuanming and An, Baoyi and Nie, Dingyu and Ren, Haoyu and Zou, Zhengwei and Yang, Yizhe and Shen, Jialie and Jin, Zhiwen and Qian, Xueming and Yang, Zhongyu and Zhao, Guoshuai}, title = {Bridge Your Fields: MeteoNet for Efficient Non-Uniform Meteorological Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1608-1617} }
SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses: Zhuohang Jiang,

Xu Yuan,

Haohao Qu,

Shanru Lin,

Kanglong Liu,

Wenqi Fan,

Li Qing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhuohang and Yuan, Xu and Qu, Haohao and Lin, Shanru and Liu, Kanglong and Fan, Wenqi and Qing, Li}, title = {SuperGlasses: Benchmarking Vision Language Models as Intelligent Agents for AI Smart Glasses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2165-2175} }
MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling: Junming Zhang,

Yifei Ji,

Yongxuan Han,

Zhenzhe Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Ji, Yifei and Han, Yongxuan and Zheng, Zhenzhe}, title = {MipKV: A Sparsify-then-Recover Paradigm for Accelerating Large Vision-Language Model Pre-Filling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2926-2936} }
PrismNet: Semantic-Aware Image Enhancement via Vision Transformer and Zero-Cost Gating: Ruichen Zhang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruichen}, title = {PrismNet: Semantic-Aware Image Enhancement via Vision Transformer and Zero-Cost Gating}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4866-4876} }
Catalyst: Out-of-Distribution Detection via Elastic Scaling: Abid Hassan,

Tuan Ngo,

Saad Shafiq,

Nenad Medvidovic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hassan_2026_CVPR, author = {Hassan, Abid and Ngo, Tuan and Shafiq, Saad and Medvidovic, Nenad}, title = {Catalyst: Out-of-Distribution Detection via Elastic Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1618-1628} }
ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction: Quanyuan Ruan,

Kewei Shi,

Jiabao Lei,

Xifeng Gao,

Xiaoguang Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Quanyuan and Shi, Kewei and Lei, Jiabao and Gao, Xifeng and Han, Xiaoguang}, title = {ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8439-8448} }
ReCliFF: Adaptive Orthogonal Decoupling for Federated Fine-tuning of Medical MLLMs: Yuncheng Jiang,

Chun-Mei Feng,

Rui Sun,

Le Zhang; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yuncheng and Feng, Chun-Mei and Sun, Rui and Zhang, Le}, title = {ReCliFF: Adaptive Orthogonal Decoupling for Federated Fine-tuning of Medical MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5275-5284} }
Inference-Time Alignment of Diffusion Models with Evolutionary Algorithms: Purvish Jajal,

Nicholas John Eliopoulos,

Benjamin Shiue-Hal Chou,

George K Thiruvathukal,

James C. Davis,

Yung-Hsiang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Davis, James C. and Lu, Yung-Hsiang}, title = {Inference-Time Alignment of Diffusion Models with Evolutionary Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4118-4128} }
VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding: Jianxiang He,

Meisheng Hong,

Jungang Li,

Weiyu Guo,

Xuming Hu,

Hui Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Jianxiang and Hong, Meisheng and Li, Jungang and Guo, Weiyu and Hu, Xuming and Xiong, Hui}, title = {VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9003-9012} }
MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data: Changhui Hu,

Bhalaji Nagarajan,

Ricardo Marques,

Petia Radeva; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Changhui and Nagarajan, Bhalaji and Marques, Ricardo and Radeva, Petia}, title = {MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7018-7029} }
How2Sign-Synth3D: Markerless Holistic Sign Language Performance Capture and Synthetic Data for Dense Landmark Tracking: Levente Tempfli,

Stephan Huber,

Oscar Koller,

Amanda Duarte; [pdf]
[bibtex]
@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Huber, Stephan and Koller, Oscar and Duarte, Amanda}, title = {How2Sign-Synth3D: Markerless Holistic Sign Language Performance Capture and Synthetic Data for Dense Landmark Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3501-3509} }
Multimodal Large Language Models as Image Classifiers: Nikita Kisel,

Illia Volkov,

Klara Janouskova,

Jiri Matas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kisel_2026_CVPR, author = {Kisel, Nikita and Volkov, Illia and Janouskova, Klara and Matas, Jiri}, title = {Multimodal Large Language Models as Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1711-1720} }
Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach: Ruichao Mao,

Zhou Fang,

Teng Guo,

Hao Yang,

Yaping Li,

Shaohua Peng,

Maji Huang,

Xiaoyu Lin,

Shuoyang Liu,

Xuepeng Li,

Yuyu Zhang,

Hai Rao; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Ruichao and Fang, Zhou and Guo, Teng and Yang, Hao and Li, Yaping and Peng, Shaohua and Huang, Maji and Lin, Xiaoyu and Liu, Shuoyang and Li, Xuepeng and Zhang, Yuyu and Rao, Hai}, title = {Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8983-8992} }
PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks: Jie Guo,

JunXiang Wu,

Nan An,

Zhen Zhang,

Shuiying Xiang,

Mingjin Zhang,

Yunsong Li,

Yu'e Gao; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Jie and Wu, JunXiang and An, Nan and Zhang, Zhen and Xiang, Shuiying and Zhang, Mingjin and Li, Yunsong and Gao, Yu'e}, title = {PSLIF: A Primary-Supplementary LIF Neuron for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2367-2376} }
Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment: Theodor Wulff,

Federico Tavella,

Rahul Singh Maharjan,

Manith Adikari,

Angelo Cangelosi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wulff_2026_CVPR, author = {Wulff, Theodor and Tavella, Federico and Maharjan, Rahul Singh and Adikari, Manith and Cangelosi, Angelo}, title = {Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9269-9281} }
Learning Predictive Visuomotor Coordination: Wenqi Jia,

Bolin Lai,

Xu Cao,

Miao Liu,

Danfei Xu,

James M. Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Wenqi and Lai, Bolin and Cao, Xu and Liu, Miao and Xu, Danfei and Rehg, James M.}, title = {Learning Predictive Visuomotor Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3609-3619} }
AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning: Xingyu Yang,

Yidan Ma,

Hanzhang Qu,

Jianfu Cao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Ma, Yidan and Qu, Hanzhang and Cao, Jianfu}, title = {AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7552-7561} }
Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification: Yichun Hu,

Zixuan Hu,

Ling-Yu Duan; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yichun and Hu, Zixuan and Duan, Ling-Yu}, title = {Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6849-6858} }
Advancing Open-Set Detection and Segmentation via Disentangled Representations: Haokang Zhang,

Yuchen Guan,

Runxi Cheng,

Yujiu Yang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haokang and Guan, Yuchen and Cheng, Runxi and Yang, Yujiu}, title = {Advancing Open-Set Detection and Segmentation via Disentangled Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6622-6632} }
SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes: Fudong Ge,

Dingning Liu,

Hanshi Wang,

Yiwei Zhang,

Jin Gao,

Weiming Hu,

Zhipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Fudong and Liu, Dingning and Wang, Hanshi and Zhang, Yiwei and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {SurfaceGS: Dynamic Surface Gaussian Splatting for Urban Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {900-909} }
Generative Visual Chain-of-Thought for Image Editing: Zijin Yin,

Tiankai Hang,

Yiji Cheng,

Shiyi Zhang,

Runze He,

Yu Xu,

Chunyu Wang,

Bing Li,

Zheng Chang,

Kongming Liang,

Qinglin Lu,

Zhanyu Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Zijin and Hang, Tiankai and Cheng, Yiji and Zhang, Shiyi and He, Runze and Xu, Yu and Wang, Chunyu and Li, Bing and Chang, Zheng and Liang, Kongming and Lu, Qinglin and Ma, Zhanyu}, title = {Generative Visual Chain-of-Thought for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4657-4667} }
CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery: Tung-I Chen,

Lingdong Wang,

Subhransu Maji,

Ramesh K. Sitaraman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tung-I and Wang, Lingdong and Maji, Subhransu and Sitaraman, Ramesh K.}, title = {CATRF: Codec-Adaptive TriPlane Radiance Fields for Volumetric Content Delivery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {457-467} }
Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data: Ji Woong Kim,

Ke Wang,

Zipeng Fu,

Sirui Chen,

Cong zhao,

Jeff Lai,

Chelsea Finn; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Ji Woong and Wang, Ke and Fu, Zipeng and Chen, Sirui and zhao, Cong and Lai, Jeff and Finn, Chelsea}, title = {Ego-Pi: VLA Fine-Tuning for Ego-Centric Human and Robot Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1515-1524} }
SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters: Shohei Tanaka,

Atsushi Hashimoto,

Yoshitaka Ushiku; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tanaka_2026_CVPR, author = {Tanaka, Shohei and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {SciPostLayoutTree: A Dataset for Structural Analysis of Scientific Posters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2753-2762} }
Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark: Ziyu Guo,

Xinyan Chen,

Renrui Zhang,

Ruichuan An,

Yu Qi,

Dongzhi Jiang,

Xiangtai Li,

Manyuan Zhang,

Hongsheng Li,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyu and Chen, Xinyan and Zhang, Renrui and An, Ruichuan and Qi, Yu and Jiang, Dongzhi and Li, Xiangtai and Zhang, Manyuan and Li, Hongsheng and Heng, Pheng-Ann}, title = {Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9175-9184} }
Actionable Human Motion Generation via Latent Imitation and Fine-Grained Text Completion: Feiyang Xie,

Haoqi Yuan,

Zongqing Lu; [pdf]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Feiyang and Yuan, Haoqi and Lu, Zongqing}, title = {Actionable Human Motion Generation via Latent Imitation and Fine-Grained Text Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3384-3393} }
Deep-to-Shallow Knowledge Transfer: Multi-Scale Self-Distillation with Bidirectional Aware for 3D Brain Segmentation: Ziwei Zhang,

Dayu Tan,

Xin Peng,

Weimin Zhong; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ziwei and Tan, Dayu and Peng, Xin and Zhong, Weimin}, title = {Deep-to-Shallow Knowledge Transfer: Multi-Scale Self-Distillation with Bidirectional Aware for 3D Brain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5158-5167} }
Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs: Mona Gandhi,

K.J. Joseph,

Srinivasan Parthasarathy,

Sayan Nag; [pdf] [supp]
[bibtex]
@InProceedings{Gandhi_2026_CVPR, author = {Gandhi, Mona and Joseph, K.J. and Parthasarathy, Srinivasan and Nag, Sayan}, title = {Through the PRISM: Principle-Aware, Interpretable, and Multi-Scale Evaluation of Visual Designs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1786-1796} }
Learning a Particle Dynamics Model with Real-World Videos: Chanho Kim,

Suhas V. Sumukh,

Li Fuxin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Chanho and Sumukh, Suhas V. and Fuxin, Li}, title = {Learning a Particle Dynamics Model with Real-World Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {498-507} }
Guided Lensless Polarization Imaging: Noa Kraicer,

Erez Yosef,

Raja Giryes; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kraicer_2026_CVPR, author = {Kraicer, Noa and Yosef, Erez and Giryes, Raja}, title = {Guided Lensless Polarization Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1252-1262} }
FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation: Zhi Rao,

Yucheng Zhou,

Benjia Zhou,

Yiqing Huang,

Sergio Escalera,

Jun Wan; [pdf]
[bibtex]
@InProceedings{Rao_2026_CVPR, author = {Rao, Zhi and Zhou, Yucheng and Zhou, Benjia and Huang, Yiqing and Escalera, Sergio and Wan, Jun}, title = {FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9237-9247} }
DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks: Jacob Piland,

Christopher Sweet,

Adam Czajka; [pdf] [arXiv]
[bibtex]
@InProceedings{Piland_2026_CVPR, author = {Piland, Jacob and Sweet, Christopher and Czajka, Adam}, title = {DiffGradCAM: A Class Activation Map Using the Full Model Decision to Solve Unaddressed Adversarial Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1201-1210} }
dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning: Yingzi Ma,

Yulong Cao,

Wenhao Ding,

Shuibai Zhang,

Yan Wang,

Boris Ivanovic,

Ming Jiang,

Marco Pavone,

Chaowei Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yingzi and Cao, Yulong and Ding, Wenhao and Zhang, Shuibai and Wang, Yan and Ivanovic, Boris and Jiang, Ming and Pavone, Marco and Xiao, Chaowei}, title = {dVLM-AD: Enhance Diffusion Vision-Language-Model for Driving via Controllable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1050-1061} }
Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment: Sy-Tuyen Ho,

Koh Jun Hao,

Ngoc-Bao Nguyen,

Alexander Binder,

Ngai-Man Cheung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2026_CVPR, author = {Ho, Sy-Tuyen and Hao, Koh Jun and Nguyen, Ngoc-Bao and Binder, Alexander and Cheung, Ngai-Man}, title = {Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8070-8079} }
UniLayDiff: A Unified Diffusion Transformer for Content-Aware Layout Generation: Zeyang Liu,

Le Wang,

Sanping Zhou,

Yuxuan Wu,

Xiaolong Sun,

Gang Hua,

Haoxiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zeyang and Wang, Le and Zhou, Sanping and Wu, Yuxuan and Sun, Xiaolong and Hua, Gang and Li, Haoxiang}, title = {UniLayDiff: A Unified Diffusion Transformer for Content-Aware Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4668-4677} }
GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design: Xiaolei Zhou,

Chuangjie Fang,

Jie Wu,

Jingyi Yang,

Boyi Lin,

Jianwei Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xiaolei and Fang, Chuangjie and Wu, Jie and Yang, Jingyi and Lin, Boyi and Zheng, Jianwei}, title = {GeoFusion-CAD: Structure-Aware Diffusion with Geometric State Space for Parametric 3D Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {243-252} }
Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement: Pu Li,

Huafeng Li,

Yafei Zhang,

Yu Liu,

Wen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Pu and Li, Huafeng and Zhang, Yafei and Liu, Yu and Wang, Wen}, title = {Breaking Degradation Coupling: A Structural Entropy-Guided Decoupled Framework and Benchmark for Infrared Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1304-1313} }
Layer Embedding Deep Fusion Graph Neural Network: Taihua Xu,

Genhao Tian,

Jicong Fan,

Xibei Yang,

Qinghua Zhang,

Yun Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Taihua and Tian, Genhao and Fan, Jicong and Yang, Xibei and Zhang, Qinghua and Cui, Yun}, title = {Layer Embedding Deep Fusion Graph Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7091-7100} }
CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models: Fawaz Sammani,

Jonas Fischer,

Nikos Deligiannis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sammani_2026_CVPR, author = {Sammani, Fawaz and Fischer, Jonas and Deligiannis, Nikos}, title = {CLIP-Free, Label Free, Unsupervised Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3262-3272} }
From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images: Fei Yu,

Yu Liu,

Luyang Tang,

Mingchao Sun,

Zengye Ge,

Rui Bu,

Yuchao Jin,

Haisen Zhao,

He Sun,

Yangyan Li,

Mu Xu,

Wenzheng Chen,

Baoquan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Fei and Liu, Yu and Tang, Luyang and Sun, Mingchao and Ge, Zengye and Bu, Rui and Jin, Yuchao and Zhao, Haisen and Sun, He and Li, Yangyan and Xu, Mu and Chen, Wenzheng and Chen, Baoquan}, title = {From Orbit to Ground: Generative City Photogrammetry from Extreme Off-Nadir Satellite Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {391-402} }
Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation: Jiedong Zhuang,

Lu Lu,

Ming Dai,

Jian Chen,

Qiang Liu,

Haoji Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Jiedong and Lu, Lu and Dai, Ming and Chen, Jian and Liu, Qiang and Hu, Haoji}, title = {Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9023-9033} }
From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models: Yukang Feng,

Wenxiao Wu,

Jianwen Sun,

Chuanhao Li,

Fanrui Zhang,

Zizhen Li,

Jiaxin Ai,

Sizhuo Zhou,

Yifan Chang,

Changxin Gao,

Shenglin Zhang,

Kaipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yukang and Wu, Wenxiao and Sun, Jianwen and Li, Chuanhao and Zhang, Fanrui and Li, Zizhen and Ai, Jiaxin and Zhou, Sizhuo and Chang, Yifan and Gao, Changxin and Zhang, Shenglin and Zhang, Kaipeng}, title = {From Static Snapshots to Dynamic Trajectories: Evaluating and Enhancing the Learning Pathways of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2220-2229} }
FraQAT: Quantization Aware Training with Fractional Bits: Luca Morreale,

Alberto Gil C P Ramos,

Malcolm Chadwick,

Mehdi Noroozi,

Ruchika Chavhan,

Abhinav Mehrotra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morreale_2026_CVPR, author = {Morreale, Luca and Gil C P Ramos, Alberto and Chadwick, Malcolm and Noroozi, Mehdi and Chavhan, Ruchika and Mehrotra, Abhinav}, title = {FraQAT: Quantization Aware Training with Fractional Bits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8514-8523} }
M^3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering: Jiayi Li,

Yuxuan Hu,

Haoran Geng,

Xiangyu Chen,

Chuhao Zhou,

Ziteng Cui,

Jianfei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Hu, Yuxuan and Geng, Haoran and Chen, Xiangyu and Zhou, Chuhao and Cui, Ziteng and Yang, Jianfei}, title = {M{\textasciicircum}3A Policy: Mutable Material Manipulation Augmentation Policy through Photometric Re-rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3070-3081} }
A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning: Changyu Liu,

James Chenhao Liang,

Wenhao Yang,

Yiming Cui,

Jinghao Yang,

Tianyang Wang,

Qifan Wang,

Dongfang Liu,

Cheng Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Changyu and Liang, James Chenhao and Yang, Wenhao and Cui, Yiming and Yang, Jinghao and Wang, Tianyang and Wang, Qifan and Liu, Dongfang and Han, Cheng}, title = {A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6943-6954} }
VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation: Haitao Jiang,

Xu Li,

Yuanyang Cao,

Ying Zhang,

Jianji Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haitao and Li, Xu and Cao, Yuanyang and Zhang, Ying and Wang, Jianji}, title = {VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6809-6818} }
Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization: Zidong Zhao,

Yihao Huang,

Qing Guo,

Tianlin Li,

Anran Li,

Kailong Wang,

Jin Song Dong,

Geguang Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zidong and Huang, Yihao and Guo, Qing and Li, Tianlin and Li, Anran and Wang, Kailong and Dong, Jin Song and Pu, Geguang}, title = {Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8049-8058} }
EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification: Yuanlin He,

Zhenchuan Wang,

Jun Chen,

Yingying He,

Jiabao Wang,

Weiwen Wang,

Kun Xu,

Zijin Zhou,

Xiaoxiao Wang,

Mingju Chen,

Tingting Liu,

Zhisong Pan; [pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yuanlin and Wang, Zhenchuan and Chen, Jun and He, Yingying and Wang, Jiabao and Wang, Weiwen and Xu, Kun and Zhou, Zijin and Wang, Xiaoxiao and Chen, Mingju and Liu, Tingting and Pan, Zhisong}, title = {EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6758-6767} }
Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction: Xian-Hua Han; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xian-Hua}, title = {Stability and Non-Local Modeling in Hybrid Convolution-Transformer Networks for Snapshot Hyperspectral Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1294-1303} }
Beyond Syntax: Action Semantics Learning for App Agents: Bohan Tang,

Dezhao Luo,

Jianheng Liu,

Jingxuan Chen,

Shaogang Gong,

Jianye Hao,

Jun Wang,

Kun Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Bohan and Luo, Dezhao and Liu, Jianheng and Chen, Jingxuan and Gong, Shaogang and Hao, Jianye and Wang, Jun and Shao, Kun}, title = {Beyond Syntax: Action Semantics Learning for App Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9444-9454} }
Weakly-Supervised Referring Video Object Segmentation Through Text Supervision: Miaojing Shi,

Jun Huang,

Zijie Yue,

Hanli Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Miaojing and Huang, Jun and Yue, Zijie and Wang, Hanli}, title = {Weakly-Supervised Referring Video Object Segmentation Through Text Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7461-7471} }
WGS: Watertight Geometry Standardization for Scalable 3D Generation: Dehao Hao,

Tanghui Jia,

Kaiyi Zhang,

Weikai Chen,

Zeyu Hu,

Yingda Yin,

Runze Zhang,

Lingting Zhu,

Li Yuan,

Xin Wang,

Long Quan; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Dehao and Jia, Tanghui and Zhang, Kaiyi and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Zhang, Runze and Zhu, Lingting and Yuan, Li and Wang, Xin and Quan, Long}, title = {WGS: Watertight Geometry Standardization for Scalable 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {569-578} }
Don't Let the Information Slip Away: Taozhe Li,

Guansu Wang,

Bo Yu,

Yiming Liu,

Wei Sun; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Taozhe and Wang, Guansu and Yu, Bo and Liu, Yiming and Sun, Wei}, title = {Don't Let the Information Slip Away}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8504-8513} }
Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning: Ryuki Tezuka,

Chihiro Nakatani,

Norimichi Ukita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tezuka_2026_CVPR, author = {Tezuka, Ryuki and Nakatani, Chihiro and Ukita, Norimichi}, title = {Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8215-8225} }
AlignVAR: Towards Globally Consistent Visual Autoregression for Image Super-Resolution: Cencen Liu,

Dongyang Zhang,

Wen Yin,

Jielei Wang,

Tianyu Li,

Ji Guo,

Wenbo Jiang,

Guoqing Wang,

Guoming Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Cencen and Zhang, Dongyang and Yin, Wen and Wang, Jielei and Li, Tianyu and Guo, Ji and Jiang, Wenbo and Wang, Guoqing and Lu, Guoming}, title = {AlignVAR: Towards Globally Consistent Visual Autoregression for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5054-5063} }
ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning: Boran Wang,

Xinming Wang,

Yi Chen,

Xiang Li,

Jian Xu,

Jing Yuan,

Cheng-Lin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Boran and Wang, Xinming and Chen, Yi and Li, Xiang and Xu, Jian and Yuan, Jing and Liu, Cheng-Lin}, title = {ChartAgent: A Chart Understanding Framework with Tool Integrated Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2773-2782} }
Dual Strategies for Test-Time Adaptation: Nam Nguyen Phuong,

Duc Nguyen The Minh,

Phi Le Nguyen,

Ehsan Abbasnejad,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Phuong_2026_CVPR, author = {Phuong, Nam Nguyen and Minh, Duc Nguyen The and Le Nguyen, Phi and Abbasnejad, Ehsan and Hoai, Minh}, title = {Dual Strategies for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2483-2492} }
Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration: Shaoguang Wang,

Weiyu Guo,

Ziyang Chen,

Yijie Xu,

Xuming Hu,

Hui Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shaoguang and Guo, Weiyu and Chen, Ziyang and Xu, Yijie and Hu, Xuming and Xiong, Hui}, title = {Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9856-9866} }
HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression: Haoxuan Li,

Mengyan Li,

Junjun Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haoxuan and Li, Mengyan and Zheng, Junjun}, title = {HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8195-8204} }
Deepfake-Agent: Aggregating Semantic Forgery Clues for Generalizable Detection: Xiao Guo,

Yue Zhang,

Mohit Bansal,

Xiaoming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Xiao and Zhang, Yue and Bansal, Mohit and Liu, Xiaoming}, title = {Deepfake-Agent: Aggregating Semantic Forgery Clues for Generalizable Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4898-4908} }
CrossWeaver: Towards Efficient Cross-Modal Interweaving and Decoupling for Weakly-Aligned Multispectral Object Detection: Haitian Yang,

Juan Fang,

Yiren Zhu,

Xudong Zhao,

Yufei Guo,

Xiaohan Zhang,

Xiaoxing Hu,

Xue Yang,

Qi Ming; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haitian and Fang, Juan and Zhu, Yiren and Zhao, Xudong and Guo, Yufei and Zhang, Xiaohan and Hu, Xiaoxing and Yang, Xue and Ming, Qi}, title = {CrossWeaver: Towards Efficient Cross-Modal Interweaving and Decoupling for Weakly-Aligned Multispectral Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6361-6370} }
GHOST: Fast Category-Agnostic Hand-Object Interaction Reconstruction from RGB Videos Using Gaussian Splatting: Ahmed Tawfik Aboukhadra,

Marcel Rogge,

Nadia Robertini,

Abdalla Arafa,

Jameel Malik,

Ahmed Elhayek,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aboukhadra_2026_CVPR, author = {Aboukhadra, Ahmed Tawfik and Rogge, Marcel and Robertini, Nadia and Arafa, Abdalla and Malik, Jameel and Elhayek, Ahmed and Stricker, Didier}, title = {GHOST: Fast Category-Agnostic Hand-Object Interaction Reconstruction from RGB Videos Using Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3394-3404} }
CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion: Akshit Jindal,

Saket Anand,

Chetan Arora,

Vikram Goyal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jindal_2026_CVPR, author = {Jindal, Akshit and Anand, Saket and Arora, Chetan and Goyal, Vikram}, title = {CLIP-Inspector: Model-Level Backdoor Detection for Prompt-Tuned CLIP via OOD Trigger Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {716-725} }
Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection: Jielun Peng,

Yabin Wang,

Yaqi Li,

Long Kong,

Xiaopeng Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Jielun and Wang, Yabin and Li, Yaqi and Kong, Long and Hong, Xiaopeng}, title = {Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6655-6666} }
RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation: Chanseul Cho,

Seokju Yun,

Jaesung Jun,

Seungjae Moon,

Youngmin Ro; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Chanseul and Yun, Seokju and Jun, Jaesung and Moon, Seungjae and Ro, Youngmin}, title = {RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7503-7513} }
Mitigating Batch Effects in Histopathology via Language-Mediated Robust Embedding Generation: Yishu Zhang,

Shushan Wu,

Zhenzhong Zhang,

Didong Li,

Huaxiu Yao,

Yun Li,

Iain Carmichael,

Katherine A Hoadley,

Hongtu Zhu,

Di Wu,

Daiwei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yishu and Wu, Shushan and Zhang, Zhenzhong and Li, Didong and Yao, Huaxiu and Li, Yun and Carmichael, Iain and A Hoadley, Katherine and Zhu, Hongtu and Wu, Di and Zhang, Daiwei}, title = {Mitigating Batch Effects in Histopathology via Language-Mediated Robust Embedding Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5201-5213} }
The Mechanics of CNN Filtering with Rectification: Liam Frija-Altarac,

Matthew Toews; [pdf] [supp]
[bibtex]
@InProceedings{Frija-Altarac_2026_CVPR, author = {Frija-Altarac, Liam and Toews, Matthew}, title = {The Mechanics of CNN Filtering with Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1690-1699} }
PhyFusion: Physics-Aware Infrared and Visible Image Fusion via Modality-Specific Physical Priors: Haiyang Jiang,

Huiqin Zhang,

Yanduo Zhang,

Jiayi Ma,

Junjun Jiang,

Huabing Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haiyang and Zhang, Huiqin and Zhang, Yanduo and Ma, Jiayi and Jiang, Junjun and Zhou, Huabing}, title = {PhyFusion: Physics-Aware Infrared and Visible Image Fusion via Modality-Specific Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4993-5002} }
AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging: Zuchi Bazarvaani,

Seung-Ho Lee,

Jeongmin Ahn,

Donghyeon Jeon,

Inho Kang,

Seung-Hoon Na; [pdf]
[bibtex]
@InProceedings{Bazarvaani_2026_CVPR, author = {Bazarvaani, Zuchi and Lee, Seung-Ho and Ahn, Jeongmin and Jeon, Donghyeon and Kang, Inho and Na, Seung-Hoon}, title = {AlphaMerging: Orthogonal Subspace Projection of Task Vectors to Reduce Task Interference for Multi-Task Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2700-2709} }
Anticipatory Planning for Multimodal AI Agents: Yongyuan Liang,

Shijie Zhou,

Yu Gu,

Hao Tan,

Gang Wu,

Franck Dernoncourt,

Jihyung Kil,

Ryan A. Rossi,

Ruiyi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yongyuan and Zhou, Shijie and Gu, Yu and Tan, Hao and Wu, Gang and Dernoncourt, Franck and Kil, Jihyung and Rossi, Ryan A. and Zhang, Ruiyi}, title = {Anticipatory Planning for Multimodal AI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5925-5935} }
Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation: JunJie Li,

Miyu Li,

Jiawei Wang,

Yu Liu,

Yumei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, JunJie and Li, Miyu and Wang, Jiawei and Liu, Yu and Wang, Yumei}, title = {Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8887-8896} }
PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs: Zhilin Zhang,

Xiang Zhang,

Jiaqi Wei,

Yiwei Xu,

Chenyu You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhilin and Zhang, Xiang and Wei, Jiaqi and Xu, Yiwei and You, Chenyu}, title = {PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9813-9823} }
Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation: Jiaxuan Zhang,

Qianqian Xu,

Peisong Wen,

Siran Dai,

Yang Liu,

Qingming Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaxuan and Xu, Qianqian and Wen, Peisong and Dai, Siran and Liu, Yang and Huang, Qingming}, title = {Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7924-7934} }
ADAPT: Attention Driven Adaptive Prompt Scheduling and InTerpolating Orthogonal Complements for Rare Concepts Generation: Kwanyoung Lee,

Hyunwoo Oh,

SeungJu Cha,

Sungho Koh,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Kwanyoung and Oh, Hyunwoo and Cha, SeungJu and Koh, Sungho and Kim, Dong-Jin}, title = {ADAPT: Attention Driven Adaptive Prompt Scheduling and InTerpolating Orthogonal Complements for Rare Concepts Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4562-4571} }
Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation: Mian Muhammad Naeem Abid,

Radu Timofte; [pdf] [supp]
[bibtex]
@InProceedings{Abid_2026_CVPR, author = {Abid, Mian Muhammad Naeem and Timofte, Radu}, title = {Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7293-7303} }
Re^2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement: Jiakun Zheng,

Ting Xiao,

Shiqin Cao,

Xinran Li,

Zhe Wang,

Chenjia Bai; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jiakun and Xiao, Ting and Cao, Shiqin and Li, Xinran and Wang, Zhe and Bai, Chenjia}, title = {Re{\textasciicircum}2MoGen: Open-Vocabulary Motion Generation via LLM Reasoning and Physics-Aware Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1378-1387} }
Fast Generative DeOcclusion for Visual Geometry and Robotics: Jieneng Chen,

Tiezheng Zhang,

Xiwei Xuan,

Ju He,

Yifan Yin,

Haojun Shi,

Suyu Ye,

Xinyi Li,

Ruisheng Yuan,

Tianmin Shu,

Alan Yuille; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jieneng and Zhang, Tiezheng and Xuan, Xiwei and He, Ju and Yin, Yifan and Shi, Haojun and Ye, Suyu and Li, Xinyi and Yuan, Ruisheng and Shu, Tianmin and Yuille, Alan}, title = {Fast Generative DeOcclusion for Visual Geometry and Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1314-1324} }
REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection: Chengxi Chu,

Nurul Japar,

Chee Kau Lim; [pdf]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Chengxi and Japar, Nurul and Lim, Chee Kau}, title = {REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8280-8290} }
Long-Tailed Out-of-Distribution Detection with Refined Separate Class Learning: Shuai Feng,

Yuxin Ge,

Baoming Zhang,

Yuntao Du,

MingCai Chen,

Chongjun Wang,

Lei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Shuai and Ge, Yuxin and Zhang, Baoming and Du, Yuntao and Chen, MingCai and Wang, Chongjun and Feng, Lei}, title = {Long-Tailed Out-of-Distribution Detection with Refined Separate Class Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6602-6611} }
ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning: Qin Li,

Qi Li,

Limei Liu,

Junfeng Yang,

Han Peng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qin and Li, Qi and Liu, Limei and Yang, Junfeng and Peng, Han}, title = {ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6997-7007} }
Scaling Pre-training to One Hundred Billion Data for Vision Language Models: Xiao Wang,

Ibrahim Alabdulmohsin,

Daniel Salz,

Zhe Li,

Keran Rong,

Xiaohua Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiao and Alabdulmohsin, Ibrahim and Salz, Daniel and Li, Zhe and Rong, Keran and Zhai, Xiaohua}, title = {Scaling Pre-training to One Hundred Billion Data for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6185-6196} }
LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map: Wei Luo,

Xiaohan Wang,

Yuehu Liu; [pdf]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Wei and Wang, Xiaohan and Liu, Yuehu}, title = {LP3: LLM-based Potential Prediction Policy for Object Navigation using a Scene-Object Semantic Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1462-1471} }
Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation: Yi Yang,

Qiang Jiao,

Mengrui Shi,

Qiang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Jiao, Qiang and Shi, Mengrui and Zhang, Qiang}, title = {Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7378-7387} }
VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction: Yu Hu,

Chong Cheng,

Sicheng Yu,

Xiaoyang Guo,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Cheng, Chong and Yu, Sicheng and Guo, Xiaoyang and Wang, Hao}, title = {VGGT4D: Mining Motion Cues in Visual Geometry Transformers for 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {414-424} }
V-GRPO: Online Reinforcement Learning for Denoising Generative Models Is Easier than You Think: Bingda Tang,

Yuhui Zhang,

Xiaohan Wang,

Jiayuan Mao,

Ludwig Schmidt,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Bingda and Zhang, Yuhui and Wang, Xiaohan and Mao, Jiayuan and Schmidt, Ludwig and Yeung-Levy, Serena}, title = {V-GRPO: Online Reinforcement Learning for Denoising Generative Models Is Easier than You Think}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3769-3778} }
SocialMirror: Reconstructing 3D Human Interaction Behaviors from Monocular Videos with Semantic and Geometric Guidance: Qi Xia,

Peishan Cong,

Ziyi Wang,

Yujing Sun,

Qin Sun,

Xinge Zhu,

Mao Ye,

Ruigang Yang,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Qi and Cong, Peishan and Wang, Ziyi and Sun, Yujing and Sun, Qin and Zhu, Xinge and Ye, Mao and Yang, Ruigang and Ma, Yuexin}, title = {SocialMirror: Reconstructing 3D Human Interaction Behaviors from Monocular Videos with Semantic and Geometric Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3510-3520} }
Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams: Kaiyuan Chen,

Shuangyu Xie,

Andrew Goldberg,

Ken Goldberg; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Kaiyuan and Xie, Shuangyu and Goldberg, Andrew and Goldberg, Ken}, title = {Learning Multi-Task Robot Trajectory Segmentation from Visual and Kinematic Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1452-1461} }
Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition: Gurucharan Srinivas,

Joshua Niemeijer,

Frank Köster; [pdf] [supp]
[bibtex]
@InProceedings{Srinivas_2026_CVPR, author = {Srinivas, Gurucharan and Niemeijer, Joshua and K\"oster, Frank}, title = {Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7122-7131} }
Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection: Xiaowei Zhao,

Zhide Liu,

Yuqing Ma,

Xianglong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xiaowei and Liu, Zhide and Ma, Yuqing and Liu, Xianglong}, title = {Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9357-9367} }
Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection: Ziyang Zheng,

Weiyan Chen,

Yao Xiao,

Zijie Cao,

Dongyu Zhang,

Pengxu Wei; [pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Ziyang and Chen, Weiyan and Xiao, Yao and Cao, Zijie and Zhang, Dongyu and Wei, Pengxu}, title = {Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8715-8724} }
Object Pose Transformer: Unifying Unseen Object Pose Estimation: Weihang Li,

Lorenzo Garattoni,

Fabien Despinoy,

Nassir Navab,

Benjamin Busam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weihang and Garattoni, Lorenzo and Despinoy, Fabien and Navab, Nassir and Busam, Benjamin}, title = {Object Pose Transformer: Unifying Unseen Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {436-446} }
Materialistic RIR: Material Conditioned Realistic RIR Generation: Mahnoor Fatima Saad,

Sagnik Majumder,

Kristen Grauman,

Ziad Al-Halah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saad_2026_CVPR, author = {Saad, Mahnoor Fatima and Majumder, Sagnik and Grauman, Kristen and Al-Halah, Ziad}, title = {Materialistic RIR: Material Conditioned Realistic RIR Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5871-5881} }
Efficient Discrete Diffusion Model for Scalable Multi-Objective Traveling Salesman Problem: Dawei Su,

Zhanhong Fang,

Junyi Luo,

Debing Wang,

Jinbiao Chen,

Zizhen Zhang; [pdf]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Dawei and Fang, Zhanhong and Luo, Junyi and Wang, Debing and Chen, Jinbiao and Zhang, Zizhen}, title = {Efficient Discrete Diffusion Model for Scalable Multi-Objective Traveling Salesman Problem}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6261-6270} }
IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing: Yuxuan Zhang,

Shijia Huang,

Liwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and Huang, Shijia and Wang, Liwei}, title = {IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8776-8785} }
Value bounds and Convergence Analysis for Averages of LRP attributions: Alexander Binder,

Nastaran Takmil-Homayouni,

Urun Dogan; [pdf] [supp]
[bibtex]
@InProceedings{Binder_2026_CVPR, author = {Binder, Alexander and Takmil-Homayouni, Nastaran and Dogan, Urun}, title = {Value bounds and Convergence Analysis for Averages of LRP attributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3343-3353} }
Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance: Junyang Chen,

Haomin Ni,

Hanjiang Lai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Ni, Haomin and Lai, Hanjiang}, title = {Optimizing Certified Radius of Zero-shot Composed Image Retrieval via Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {819-828} }
DepthScopy: Decoupling Frequency for Endoscopic Depth Estimation in Sparsely-Textured Regions: Minghai Shi,

Xiaoxian Zhang,

Xiaoyue Liu,

Fan Yang,

Lei Li; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Minghai and Zhang, Xiaoxian and Liu, Xiaoyue and Yang, Fan and Li, Lei}, title = {DepthScopy: Decoupling Frequency for Endoscopic Depth Estimation in Sparsely-Textured Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5265-5274} }
GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting: Yuhan Ping,

Cheng Lin,

Yuan Liu,

Zhiyang Dou,

Jia Pan,

Wenping Wang; [pdf]
[bibtex]
@InProceedings{Ping_2026_CVPR, author = {Ping, Yuhan and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Pan, Jia and Wang, Wenping}, title = {GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7132-7142} }
DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering: Xulun Ye,

Yuanyuan Deng,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Deng, Yuanyuan and Zhou, Kun}, title = {DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7654-7664} }
SFS-DETR: Spatial-Frequency Selection for UAV Object Detection: Dingding Jia,

Jiankang Wang,

Longlong Zhang,

Zhiheng Liu,

Xuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Dingding and Wang, Jiankang and Zhang, Longlong and Liu, Zhiheng and Wang, Xuan}, title = {SFS-DETR: Spatial-Frequency Selection for UAV Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6582-6591} }
UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization: Qing Huang,

Zhipei Xu,

Xuanyu Zhang,

Xiangyu Yu,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qing and Xu, Zhipei and Zhang, Xuanyu and Yu, Xiangyu and Zhang, Jian}, title = {UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8121-8132} }
Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers: Sanghyeok Nam,

Byoungjun Kim,

Daehyung Park,

Tae-Kyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2026_CVPR, author = {Nam, Sanghyeok and Kim, Byoungjun and Park, Daehyung and Kim, Tae-Kyun}, title = {Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3698-3708} }
Another BRIXEL in the Wall: Towards Cheaper Dense Features: Alexander Lappe,

Martin A. Giese; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lappe_2026_CVPR, author = {Lappe, Alexander and Giese, Martin A.}, title = {Another BRIXEL in the Wall: Towards Cheaper Dense Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7605-7614} }
Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining: Jie Xu,

Na Zhao; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jie and Zhao, Na}, title = {Stream3D: Streaming Zero-Shot 3D Instance Segmentation with Multi-View Noise Mask Filtering and Manifold Refining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {327-337} }
Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments: Chihiro Noguchi,

Takaki Yamamoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noguchi_2026_CVPR, author = {Noguchi, Chihiro and Yamamoto, Takaki}, title = {Pseudo-Expert Regularized Offline RL for End-to-End Autonomous Driving in Photorealistic Closed-Loop Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1096-1105} }
Mining Real-World Image Relations for Large-Scale Controllable Generation and Editing: Hao Shao,

Liyang Liu,

Zhengxiong Luo,

Zhuofan Zong,

Hongsheng Li; [pdf]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Hao and Liu, Liyang and Luo, Zhengxiong and Zong, Zhuofan and Li, Hongsheng}, title = {Mining Real-World Image Relations for Large-Scale Controllable Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3893-3902} }
Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media: Megha Mariam K.M,

Vineeth N. Balasubramanian,

C.V. Jawahar; [pdf] [supp]
[bibtex]
@InProceedings{K.M_2026_CVPR, author = {K.M, Megha Mariam and Balasubramanian, Vineeth N. and Jawahar, C.V.}, title = {Unifying Scientific Communication: Fine-Grained Correspondence Across Scientific Media}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2079-2088} }
Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering: Powei Liao,

Jiro Abe,

Kazumine Ogura; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Powei and Abe, Jiro and Ogura, Kazumine}, title = {Point2Gaussian: Point-Cloud-to-Gaussian Conversion for Efficient 3D Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {109-118} }
VIDEOP2R: Video Understanding from Perception to Reasoning: Yifan Jiang,

Yueying Wang,

Rui Zhao,

Toufiq Parag,

Zhimin Chen,

Zhenyu Liao,

Jayakrishnan Unnikrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yifan and Wang, Yueying and Zhao, Rui and Parag, Toufiq and Chen, Zhimin and Liao, Zhenyu and Unnikrishnan, Jayakrishnan}, title = {VIDEOP2R: Video Understanding from Perception to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8303-8313} }
DA-CLIP: Mitigating Granularity Mismatch in Zero-Shot Anomaly Detection via Decoupled Text-Visual Alignment: Jianqin Liu,

Peng Wang,

Junming Huang,

Xue Zhou,

Li Yu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jianqin and Wang, Peng and Huang, Junming and Zhou, Xue and Yu, Li}, title = {DA-CLIP: Mitigating Granularity Mismatch in Zero-Shot Anomaly Detection via Decoupled Text-Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6062-6071} }
Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution: Yuexin Wang,

Xiaolei Wang,

Guangliang Cheng,

Huihui Bai,

Tammam Tillo,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuexin and Wang, Xiaolei and Cheng, Guangliang and Bai, Huihui and Tillo, Tammam and Xiao, Jimin}, title = {Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8836-8845} }
Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration: Yujie Chen,

Haotong Qin,

Zhang Zhang,

Michele Magno,

Luca Benini,

Yawei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yujie and Qin, Haotong and Zhang, Zhang and Magno, Michele and Benini, Luca and Li, Yawei}, title = {Q-MambaIR: Accurate Quantized Mamba for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2524-2533} }
Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP: Yusung Ro,

Jaehyun Choi,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ro_2026_CVPR, author = {Ro, Yusung and Choi, Jaehyun and Kim, Junmo}, title = {Beyond Semantics: Disentangling Information Scope in Sparse Autoencoders for CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3252-3261} }
Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs: Rongqian chen,

Allison Andreyev,

Yanming Xiu,

Joshua Chilukuri,

Shunav Sen,

Mahdi Imani,

Bin Li,

Maria Gorlatova,

Gang Tan,

Tian Lan; [pdf]
[bibtex]
@InProceedings{chen_2026_CVPR, author = {chen, Rongqian and Andreyev, Allison and Xiu, Yanming and Chilukuri, Joshua and Sen, Shunav and Imani, Mahdi and Li, Bin and Gorlatova, Maria and Tan, Gang and Lan, Tian}, title = {Cognitive Attack Detection in Augmented Reality (CADAR): A Neuro-Symbolic Approach with Particle Filtering on Perception Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {799-808} }
WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting: Lezhong Wang,

Mehmet Onurcan Kaya,

Siavash Arjomand Bigdeli,

Jeppe Revall Frisvad; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lezhong and Kaya, Mehmet Onurcan and Bigdeli, Siavash Arjomand and Frisvad, Jeppe Revall}, title = {WildRelight: A Real-World Dataset and Benchmark for Single-Image Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2007-2016} }
PrismPrune: Decoupling Saliency and Diversity in Attention for Efficient Visual Token Pruning in VLMs: Ziniu Liu,

Shuheng Zhou,

Mingqing Liu,

Hao Deng,

Huijia Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ziniu and Zhou, Shuheng and Liu, Mingqing and Deng, Hao and Zhu, Huijia}, title = {PrismPrune: Decoupling Saliency and Diversity in Attention for Efficient Visual Token Pruning in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6174-6184} }
From Coarse to Precise: Rethinking and Bridging Localization in Multimodal Large Language Models: Lysa Xiao,

Veronica Liesaputra,

Lech Szymanski,

Stephen Cranefield; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Lysa and Liesaputra, Veronica and Szymanski, Lech and Cranefield, Stephen}, title = {From Coarse to Precise: Rethinking and Bridging Localization in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5882-5891} }
UMI-HOI: Unifying Multimodal Information with Semantic Multi-Head Attention for Human-Object Interaction Detection: Yuankai Wu,

Zhinan Li,

Constantin Patsch,

Marsil Zakour,

Driton Salihu,

Eckehard Steinbach; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yuankai and Li, Zhinan and Patsch, Constantin and Zakour, Marsil and Salihu, Driton and Steinbach, Eckehard}, title = {UMI-HOI: Unifying Multimodal Information with Semantic Multi-Head Attention for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5999-6008} }
Context-Aware Semantic Segmentation via Stage-Wise Attention: Antoine Carreaud,

Elias Naha,

Arthur Chansel,

Nina Lahellec,

Jan Skaloud,

Adrien Gressin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Carreaud_2026_CVPR, author = {Carreaud, Antoine and Naha, Elias and Chansel, Arthur and Lahellec, Nina and Skaloud, Jan and Gressin, Adrien}, title = {Context-Aware Semantic Segmentation via Stage-Wise Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2680-2690} }
Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach: Xincheng Wang,

Hanchi Sun,

Wenjun Sun,

Kejun Xue,

Wangqiu Zhou,

Jianbo Zhang,

Wei Sun,

Dandan Zhu,

Xiongkuo Min,

Jun Jia,

Zhijun Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xincheng and Sun, Hanchi and Sun, Wenjun and Xue, Kejun and Zhou, Wangqiu and Zhang, Jianbo and Sun, Wei and Zhu, Dandan and Min, Xiongkuo and Jia, Jun and Fang, Zhijun}, title = {Evaluating Dataset Watermarking for Fine-Tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2230-2239} }
Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts: Yongqi Yang,

Yuke Li,

Heng Huang,

Zhihui Li,

Bo Du,

Yu Wu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yongqi and Li, Yuke and Huang, Heng and Li, Zhihui and Du, Bo and Wu, Yu}, title = {Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8019-8028} }
Mitigating Information Forgetting via Entropy-Driven Progressive Retrospection for Multimodal Long Reasoning: Yifei Gao,

Ning Xu,

Guoqing Jin,

Shenyuan Zhang,

An-An Liu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yifei and Xu, Ning and Jin, Guoqing and Zhang, Shenyuan and Liu, An-An}, title = {Mitigating Information Forgetting via Entropy-Driven Progressive Retrospection for Multimodal Long Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5755-5764} }
Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers: Ziyi Guo,

Zhou Liu,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyi and Liu, Zhou and Zhang, Wentao}, title = {Paper2SysArch: Structure-Constrained System Architecture Generation from Scientific Papers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1996-2006} }
OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models: Yuping Yan,

Yuhan Xie,

Yuanshuai Li,

Yingchao Yu,

Lingjuan Lyu,

Yaochu Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Yuping and Xie, Yuhan and Li, Yuanshuai and Yu, Yingchao and Lyu, Lingjuan and Jin, Yaochu}, title = {OutSafe-Bench: A Benchmark for Multimodal Offensive Content Detection in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1965-1975} }
ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization: Hao Cao,

Chengbin Liang,

Wenqi Guo,

Zhijin Qin,

Jungong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Hao and Liang, Chengbin and Guo, Wenqi and Qin, Zhijin and Han, Jungong}, title = {ProGIC: Progressive and Lightweight Generative Image Compression with Residual Vector Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2915-2925} }
Circuit Tracing in Vision-Language Models: Understanding the Internal Mechanisms of Multimodal Thinking: Jingcheng Yang,

Tianhu Xiong,

Shengyi Qian,

Klara Nahrstedt,

Mingyuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jingcheng and Xiong, Tianhu and Qian, Shengyi and Nahrstedt, Klara and Wu, Mingyuan}, title = {Circuit Tracing in Vision-Language Models: Understanding the Internal Mechanisms of Multimodal Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3322-3331} }
ASTRA: Enhancing Multi-Subject Generation with Retrieval-Augmented Pose Guidance and Disentangled Position Embedding: Tianze Xia,

Zijian Ning,

Zonglin Zhao,

Mingjia Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Tianze and Ning, Zijian and Zhao, Zonglin and Wang, Mingjia}, title = {ASTRA: Enhancing Multi-Subject Generation with Retrieval-Augmented Pose Guidance and Disentangled Position Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3820-3829} }
In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection: Yunxuan Li,

Bohao Liu,

Yanxia Wu,

Rongsheng Li; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yunxuan and Liu, Bohao and Wu, Yanxia and Li, Rongsheng}, title = {In2CLR: Joint Intra-Inter Curriculum Learning with Review for Degraded Fake Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2398-2407} }
Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator: Xiankang He,

Dongyan Guo,

Hongji Li,

Ying Cui,

Libo Weng,

Ruibo Li,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xiankang and Guo, Dongyan and Li, Hongji and Cui, Ying and Weng, Libo and Li, Ruibo and Zhang, Chi}, title = {Distill Any Depth: Distillation Creates a Stronger Monocular Depth Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {591-601} }
FA-MoE: Improving Medical Image Generation Through Frequency-Aware Mixture of Experts: Yifan Sun,

Qingjie Meng,

Tao Chen,

Huiping Chen; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yifan and Meng, Qingjie and Chen, Tao and Chen, Huiping}, title = {FA-MoE: Improving Medical Image Generation Through Frequency-Aware Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3988-3997} }
PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning: Wenxiang Xie,

Anpei Chen,

Haoming Yu,

Yujun Shen,

Weiwei Xu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Wenxiang and Chen, Anpei and Yu, Haoming and Shen, Yujun and Xu, Weiwei}, title = {PlanGS: Active 3D Gaussian Reconstruction with Real-Time Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3156-3166} }
Dual Anchors, Do It Better: Hierarchical Group Merging for Zero-Shot Anomaly Detection: Jimin Roh,

Dongkyu Kim,

Suk-Ju Kang; [pdf] [supp]
[bibtex]
@InProceedings{Roh_2026_CVPR, author = {Roh, Jimin and Kim, Dongkyu and Kang, Suk-Ju}, title = {Dual Anchors, Do It Better: Hierarchical Group Merging for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6218-6227} }
Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection: Peng Zhang,

Xiang Yuan,

Cong Li,

Junwei Han,

Gong Cheng; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peng and Yuan, Xiang and Li, Cong and Han, Junwei and Cheng, Gong}, title = {Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6829-6838} }
Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features: Minseo Seong,

Youngwook Kim; [pdf]
[bibtex]
@InProceedings{Seong_2026_CVPR, author = {Seong, Minseo and Kim, Youngwook}, title = {Image Classification Using CNN-QNN Hybrid Model with Optimized Correlated Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2473-2482} }
Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation: Sanjana Reddy,

Ishaan Malhi,

Sally Ma,

Praneet Dutta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reddy_2026_CVPR, author = {Reddy, Sanjana and Malhi, Ishaan and Ma, Sally and Dutta, Praneet}, title = {Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8868-8876} }
VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection: Hui Han,

Shunli Wang,

Yandan Zhao,

Taiping Yao,

Shouhong Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Hui and Wang, Shunli and Zhao, Yandan and Yao, Taiping and Ding, Shouhong}, title = {VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9552-9562} }
FlowC2S: Flowing from Current to Succeeding Frames for Fast and Memory-Efficient Video Continuation: Hovhannes Margaryan,

Quentin Bammey,

Christian Sandor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Margaryan_2026_CVPR, author = {Margaryan, Hovhannes and Bammey, Quentin and Sandor, Christian}, title = {FlowC2S: Flowing from Current to Succeeding Frames for Fast and Memory-Efficient Video Continuation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3861-3872} }
Fast Kernel-Space Diffusion for Remote Sensing Pansharpening: Hancong Jin,

Zihan Cao,

Liang-Jian Deng,

Jingjing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Hancong and Cao, Zihan and Deng, Liang-Jian and Li, Jingjing}, title = {Fast Kernel-Space Diffusion for Remote Sensing Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6291-6301} }
Name That Part: 3D Part Segmentation and Naming: Soumava Paul,

Prakhar Kaushik,

Ankit Vaidya,

Anand Bhattad,

Alan Yuille; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paul_2026_CVPR, author = {Paul, Soumava and Kaushik, Prakhar and Vaidya, Ankit and Bhattad, Anand and Yuille, Alan}, title = {Name That Part: 3D Part Segmentation and Naming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1808-1817} }
DebFilter: Eradicating Biases Stashed in Value: Seung Hyuk Lee,

Songkuk Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seung Hyuk and Kim, Songkuk}, title = {DebFilter: Eradicating Biases Stashed in Value}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4790-4799} }
Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs: Hyungjin Chung,

Hyelin Nam,

Jiyeon Kim,

Hyojun Go,

Byeongjun Park,

Junho Kim,

Joonseok Lee,

Seongsu Ha,

Byung-Hoon Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2026_CVPR, author = {Chung, Hyungjin and Nam, Hyelin and Kim, Jiyeon and Go, Hyojun and Park, Byeongjun and Kim, Junho and Lee, Joonseok and Ha, Seongsu and Kim, Byung-Hoon}, title = {Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8972-8982} }
FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series: Devansh Garg; [pdf]
[bibtex]
@InProceedings{Garg_2026_CVPR, author = {Garg, Devansh}, title = {FinChart-Multimodal: A Dataset for Context-Injected Financial Chart Understanding with Aligned OHLCV Time Series}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1934-1942} }
MAE-XNT: A Foundation Model for Segmenting Neuronal Tissue Volumes Generated with X-Ray Nanotomography: Alfred Laugros,

Sebastien Roig,

Alexandra Pacureanu; [pdf] [supp]
[bibtex]
@InProceedings{Laugros_2026_CVPR, author = {Laugros, Alfred and Roig, Sebastien and Pacureanu, Alexandra}, title = {MAE-XNT: A Foundation Model for Segmenting Neuronal Tissue Volumes Generated with X-Ray Nanotomography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5388-5398} }
Vision-Language Models Encode Clinical Guidelines for Concept-Based Medical Reasoning: Mohamed Harmanani,

Bining Long,

Zhuoxin Guo,

Paul F.R. Wilson,

Amirhossein Sabour,

Minh Nguyen Nhat To,

Gabor Fichtinger,

Purang Abolmaesumi,

Parvin Mousavi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Harmanani_2026_CVPR, author = {Harmanani, Mohamed and Long, Bining and Guo, Zhuoxin and Wilson, Paul F.R. and Sabour, Amirhossein and To, Minh Nguyen Nhat and Fichtinger, Gabor and Abolmaesumi, Purang and Mousavi, Parvin}, title = {Vision-Language Models Encode Clinical Guidelines for Concept-Based Medical Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5589-5598} }
Intelligent Photo Retouching with Language Model-Based Artist Agents: Haoyu Chen,

Keda Tao,

YiZao Wang,

Xinlei Wang,

Lei Zhu,

Jinjin Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haoyu and Tao, Keda and Wang, YiZao and Wang, Xinlei and Zhu, Lei and Gu, Jinjin}, title = {Intelligent Photo Retouching with Language Model-Based Artist Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1240-1251} }
NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries: Kanon Amemiya,

Daichi Yashima,

Kei Katsumata,

Takumi Komatsu,

Ryosuke Korekata,

Seitaro Otsuki,

Komei Sugiura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Amemiya_2026_CVPR, author = {Amemiya, Kanon and Yashima, Daichi and Katsumata, Kei and Komatsu, Takumi and Korekata, Ryosuke and Otsuki, Seitaro and Sugiura, Komei}, title = {NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9034-9044} }
CREM: Compression-Driven Representation Enhancement for Multimodal Retrieval and Comprehension: Lihao Liu,

Biao Yang,

Yan Wang,

Da Li,

Jiangxia Cao,

Yuxiao Luo,

Xiang Chen,

Xiangyu Wu,

Wei Yuan,

Fan Yang,

Guiguang Ding,

Tingting Gao,

Guorui Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Lihao and Yang, Biao and Wang, Yan and Li, Da and Cao, Jiangxia and Luo, Yuxiao and Chen, Xiang and Wu, Xiangyu and Yuan, Wei and Yang, Fan and Ding, Guiguang and Gao, Tingting and Zhou, Guorui}, title = {CREM: Compression-Driven Representation Enhancement for Multimodal Retrieval and Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5714-5724} }
AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models: Zijin Zhou,

Songan Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijin and Zhang, Songan}, title = {AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9259-9268} }
CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging: Ashwin Kumar,

Robbie Holland,

Corey Barrett,

Jangwon Kim,

Maya Varma,

Zhihong Chen,

Yunhe Gao,

Greg Zaharchuk,

Tara Taghavi,

Krishnaram Kenthapadi,

Akshay Chaudhari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashwin and Holland, Robbie and Barrett, Corey and Kim, Jangwon and Varma, Maya and Chen, Zhihong and Gao, Yunhe and Zaharchuk, Greg and Taghavi, Tara and Kenthapadi, Krishnaram and Chaudhari, Akshay}, title = {CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9466-9476} }
Open-Set Spatial Gene Expression Prediction from Histological Images via Retrieval-Augmented Generation: Chaochen Wu,

Meiyun Zuo,

Lei Xie; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chaochen and Zuo, Meiyun and Xie, Lei}, title = {Open-Set Spatial Gene Expression Prediction from Histological Images via Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5495-5504} }
AnatomiX, an Anatomy-Aware Grounded Multimodal Large Language Model for Chest X-Ray Interpretation: Anees Ur Rehman Hashmi,

Numan Saeed,

Christoph Lippert; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hashmi_2026_CVPR, author = {Hashmi, Anees Ur Rehman and Saeed, Numan and Lippert, Christoph}, title = {AnatomiX, an Anatomy-Aware Grounded Multimodal Large Language Model for Chest X-Ray Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6009-6018} }
A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action: Kaidong Zhang,

Jian Zhang,

Rongtao Xu,

Yu Sun,

Youpeng Wen,

Shuoshuo Xue,

Xiaoyu Guo,

Minghao Guo,

Weijia Liufu,

Liu Zihou,

Kangyi Ji,

Zihang Li,

Ruiyi Chen,

Meng Cao,

Jingming Zhang,

Shen Zhao,

Xiaojun Chang,

Feng Zheng,

Ivan Laptev,

Xiaodan Liang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaidong and Zhang, Jian and Xu, Rongtao and Sun, Yu and Wen, Youpeng and Xue, Shuoshuo and Guo, Xiaoyu and Guo, Minghao and Liufu, Weijia and Zihou, Liu and Ji, Kangyi and Li, Zihang and Chen, Ruiyi and Cao, Meng and Zhang, Jingming and Zhao, Shen and Chang, Xiaojun and Zheng, Feng and Laptev, Ivan and Liang, Xiaodan}, title = {A1: Adaptive Truncated Vision-Language-Action Model from Affordance to Action}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1503-1514} }
Rethinking Conditioning in Diffusion Models: Dynamic Token Scheduling for Efficient and Aligned Text-to-Image Generation: Jia Li,

Xiaomeng Fu,

Yizhao Gao,

Jiaxu Wang,

Xi Wang,

Hayden Kwok-Hay So; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jia and Fu, Xiaomeng and Gao, Yizhao and Wang, Jiaxu and Wang, Xi and So, Hayden Kwok-Hay}, title = {Rethinking Conditioning in Diffusion Models: Dynamic Token Scheduling for Efficient and Aligned Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4160-4169} }
Beyond Top-1: Forensic Analysis of Full Prediction Distributions Reveals Hidden Model Reasoning: Minhyeok Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {Beyond Top-1: Forensic Analysis of Full Prediction Distributions Reveals Hidden Model Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3273-3281} }
Entropy-Based Visual Re-perception Inference for Multimodal Models: Jia Liufu,

Qiangyu Yan,

Zhehan Kan,

Wenming Yang,

Hailin Hu,

Xinghao Chen,

Borui Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Liufu_2026_CVPR, author = {Liufu, Jia and Yan, Qiangyu and Kan, Zhehan and Yang, Wenming and Hu, Hailin and Chen, Xinghao and Jiang, Borui}, title = {Entropy-Based Visual Re-perception Inference for Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9770-9779} }
MotionDuet: Dual-Conditioned 3D Human Motion Generation with Video-Regularized Text Learning: Yi-Yang Zhang,

Tengjiao Sun,

Pengcheng Fang,

Deng-Bao Wang,

Xiaohao Cai,

Min-Ling Zhang,

Hansung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Yang and Sun, Tengjiao and Fang, Pengcheng and Wang, Deng-Bao and Cai, Xiaohao and Zhang, Min-Ling and Kim, Hansung}, title = {MotionDuet: Dual-Conditioned 3D Human Motion Generation with Video-Regularized Text Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3709-3718} }
Is Prompt Selection Necessary for Task-Free Online Continual Learning?: Seoyoung Park,

Haemin Lee,

Hankook Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Seoyoung and Lee, Haemin and Lee, Hankook}, title = {Is Prompt Selection Necessary for Task-Free Online Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7883-7892} }
3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization: Sizhe Song,

Yankuan Chi,

Shuhan Zhong,

S.-H. Gary Chan; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Sizhe and Chi, Yankuan and Zhong, Shuhan and Chan, S.-H. Gary}, title = {3DFA: Aligning the Features Between Point Cloud and Query Image for Scene-Specific Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {360-369} }
Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers: Syeda Fiza Rubab,

Arslan Abdul Ghaffar,

Malik Junaid Jami Gul,

Sheriff Murtala,

Ingyu Lee,

Gyu Sang Choi; [pdf] [supp]
[bibtex]
@InProceedings{Rubab_2026_CVPR, author = {Rubab, Syeda Fiza and Ghaffar, Arslan Abdul and Gul, Malik Junaid Jami and Murtala, Sheriff and Lee, Ingyu and Choi, Gyu Sang}, title = {Dyna-ViT: Parameter-Free Pre-Encoder Token Pruning for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2844-2851} }
Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection: Hantao Zheng,

Ning Han,

Yawen Zeng,

Hegui Zhu,

Hao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hantao and Han, Ning and Zeng, Yawen and Zhu, Hegui and Chen, Hao}, title = {Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9045-9054} }
ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection: Ling Yi,

Zhe Chen,

Gaochang Wu,

Jinliang Ding,

Xiaojie Wang,

Zhaolong Ning; [pdf]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Ling and Chen, Zhe and Wu, Gaochang and Ding, Jinliang and Wang, Xiaojie and Ning, Zhaolong}, title = {ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7697-7705} }
Phantasia: Context-Adaptive Backdoors in Vision Language Models: Nam Duong Tran,

Phi Le Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Nam Duong and Le Nguyen, Phi}, title = {Phantasia: Context-Adaptive Backdoors in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {695-704} }
CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment: Satyam Merothiya,

Chanda Grover Kamra,

Indra Deep Mastan; [pdf] [supp]
[bibtex]
@InProceedings{Merothiya_2026_CVPR, author = {Merothiya, Satyam and Kamra, Chanda Grover and Mastan, Indra Deep}, title = {CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8695-8704} }
FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning: Mengjie Li,

Liu Yang,

Qi Shen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengjie and Yang, Liu and Shen, Qi}, title = {FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6987-6996} }
Model Merging on Loss Landscapes: A Geometric Perspective: Juanwu Lu,

Anand Bhaskar,

Brian Axelrod,

Ekaterina Tolstaya,

Tristan Emrich; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Juanwu and Bhaskar, Anand and Axelrod, Brian and Tolstaya, Ekaterina and Emrich, Tristan}, title = {Model Merging on Loss Landscapes: A Geometric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7644-7653} }
Rethinking Whole-Body CT Image Interpretation: An Abnormality-Centric Approach: Ziheng Zhao,

Lisong Dai,

Ya Zhang,

Weidi Xie,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziheng and Dai, Lisong and Zhang, Ya and Xie, Weidi and Wang, Yanfeng}, title = {Rethinking Whole-Body CT Image Interpretation: An Abnormality-Centric Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5179-5189} }
DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation: Haiduo Huang,

Jiangcheng Song,

Yadong Zhang,

Guansu Wang,

Pengju Ren; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Haiduo and Song, Jiangcheng and Zhang, Yadong and Wang, Guansu and Ren, Pengju}, title = {DR-DPO: Dual-Regularized DPO for Efficient Dataset Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2111-2120} }
Efficient Document Parsing via Parallel Token Prediction: Lei Li,

Ze Zhao,

Meng Li,

Zhongwang Lun,

Yi Yuan,

Xingjing Lu,

Zheng Wei,

Jiang Bian,

Zang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lei and Zhao, Ze and Li, Meng and Lun, Zhongwang and Yuan, Yi and Lu, Xingjing and Wei, Zheng and Bian, Jiang and Li, Zang}, title = {Efficient Document Parsing via Parallel Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2763-2772} }
PaLMR: Towards Faithful Visual Reasoning via Multimodal Process Alignment: Yantao Li,

Chenyang Yan,

Qiang Hui,

Fang Zhao,

Kanzhi Cheng,

Chao Tan,

Huanlin Gao,

Jianbing Zhang,

Kai Wang,

Xinyu Dai,

Shiguo Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yantao and Yan, Chenyang and Hui, Qiang and Zhao, Fang and Cheng, Kanzhi and Tan, Chao and Gao, Huanlin and Zhang, Jianbing and Wang, Kai and Dai, Xinyu and Lian, Shiguo}, title = {PaLMR: Towards Faithful Visual Reasoning via Multimodal Process Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6111-6121} }
SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection: Yongchao Feng,

Ziyue Huang,

Jinqing Zhang,

Wenrui Cai,

Qingjie Liu; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yongchao and Huang, Ziyue and Zhang, Jinqing and Cai, Wenrui and Liu, Qingjie}, title = {SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7779-7788} }
VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering: Zihu Wang,

Boxun Xu,

Yuxuan Xia,

Peng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihu and Xu, Boxun and Xia, Yuxuan and Li, Peng}, title = {VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9055-9064} }
Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment: Qinlin Hu,

Mingliang Zhou,

Xingran Liao; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Qinlin and Zhou, Mingliang and Liao, Xingran}, title = {Texture-Guided Multiscale Cross-Modal Fusion for AI-Generated Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2629-2638} }
Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning: Sungwon Woo,

Dongjun Hwang,

Shiwon Kim,

Junsuk Choe,

Jongho Nang; [pdf] [supp]
[bibtex]
@InProceedings{Woo_2026_CVPR, author = {Woo, Sungwon and Hwang, Dongjun and Kim, Shiwon and Choe, Junsuk and Nang, Jongho}, title = {Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7634-7643} }
NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation: Lizhi Xiong,

Jianguo Feng,

Ziqiang Li,

Jun Li,

Weiwei Jiang,

Zhangjie Fu; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Lizhi and Feng, Jianguo and Li, Ziqiang and Li, Jun and Jiang, Weiwei and Fu, Zhangjie}, title = {NSGuard: Null-Space Guided Robust Watermarking for Data Copyright Protection in Customized Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {675-684} }
FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment: Yicong Li,

Howard Leung; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yicong and Leung, Howard}, title = {FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8258-8267} }
TinySR: Shallow Diffusion Transformers for Real-World Image Super-Resolution: Linwei Dong,

Qingnan Fan,

Yuhang Yu,

Qi Zhang,

Jinwei Chen,

Yawei Luo,

Changqing Zou; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Linwei and Fan, Qingnan and Yu, Yuhang and Zhang, Qi and Chen, Jinwei and Luo, Yawei and Zou, Changqing}, title = {TinySR: Shallow Diffusion Transformers for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5075-5085} }
4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System: Bo Xu,

Jun Dai,

Yutian Chen,

Linning Xu,

Mulin Yu,

Yujin Wang,

Shi Guo,

Xinyi Le,

Tianfan Xue; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Dai, Jun and Chen, Yutian and Xu, Linning and Yu, Mulin and Wang, Yujin and Guo, Shi and Le, Xinyi and Xue, Tianfan}, title = {4D E-SloMo: 4D Reconstruction for High Speed Scene using a Hybrid RGB-Event Multi-View System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {43-53} }
ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization: Yanzhao Yu,

Yi Ding,

Peijun Tang,

Haotian Yang,

Xianbiao Qi,

Jianan Wang,

Xueqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yanzhao and Ding, Yi and Tang, Peijun and Yang, Haotian and Qi, Xianbiao and Wang, Jianan and Wang, Xueqian}, title = {ReaAct: Bridging Robotic Reasoning and Action Generation Toward Real-World Spatial Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1441-1451} }
Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency: Jingi Kim,

Wonjun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jingi and Kim, Wonjun}, title = {Generalizable Human Gaussian Splatting via Multi-view Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {139-148} }
FedErase: Personalized Federated Unlearning for Text-to-Image Diffusion Models: Tianyu Geng,

Wenfei Liang,

Sijie Wang,

Rui She,

Wee Peng Tay; [pdf] [supp]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Tianyu and Liang, Wenfei and Wang, Sijie and She, Rui and Tay, Wee Peng}, title = {FedErase: Personalized Federated Unlearning for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4035-4044} }
WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation: Z. Jonny Kong,

Sibendu Paul,

Y. Charlie Hu; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Z. Jonny and Paul, Sibendu and Hu, Y. Charlie}, title = {WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8618-8628} }
Organizing Unstructured Image Collections using Natural Language: Mingxuan Liu,

Zhun Zhong,

Jun Li,

Gianni Franchi,

Subhankar Roy,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mingxuan and Zhong, Zhun and Li, Jun and Franchi, Gianni and Roy, Subhankar and Ricci, Elisa}, title = {Organizing Unstructured Image Collections using Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8907-8918} }
Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving: Mingbo Dai,

Han Yan,

Bolun Zhang,

Wu Ran,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Mingbo and Yan, Han and Zhang, Bolun and Ran, Wu and Ma, Chao}, title = {Dynamic Scene Decomposition Beyond Moving Objects for High-Fidelity 3D Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {181-190} }
MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition: Haote Yang,

Hui Wang,

Chen Zhu,

Jingchao Wang,

Linye Li,

Hongbin Lai,

Huijie Ao,

Yongxuan Lv,

Jiang Wu,

Jiaxing Sun,

Lua Chen,

Yuanyuan Cao,

Ruijie Zhang,

Shengxin Lu,

Lijun Wu,

Bin Wang,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haote and Wang, Hui and Zhu, Chen and Wang, Jingchao and Li, Linye and Lai, Hongbin and Ao, Huijie and Lv, Yongxuan and Wu, Jiang and Sun, Jiaxing and Chen, Lua and Cao, Yuanyuan and Zhang, Ruijie and Lu, Shengxin and Wu, Lijun and Wang, Bin and He, Conghui}, title = {MolRecBench-Wild: A Real-World Benchmark for Optical Chemical Structure Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1924-1933} }
SCAIL: Towards Studio-Grade Character Animation via In-Context Learning of 3D-Consistent Pose Representations: Wenhao Yan,

Sheng Ye,

Zhuoyi Yang,

Jiayan Teng,

ZhenHui Dong,

Kairui Wen,

Xiaotao Gu,

Yong-Jin Liu,

Jie Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Wenhao and Ye, Sheng and Yang, Zhuoyi and Teng, Jiayan and Dong, ZhenHui and Wen, Kairui and Gu, Xiaotao and Liu, Yong-Jin and Tang, Jie}, title = {SCAIL: Towards Studio-Grade Character Animation via In-Context Learning of 3D-Consistent Pose Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4450-4460} }
Latent Domain Modeling Improves Robustness to Geographic Shifts: Ruth Crasto,

Esther Rolf; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Crasto_2026_CVPR, author = {Crasto, Ruth and Rolf, Esther}, title = {Latent Domain Modeling Improves Robustness to Geographic Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2419-2428} }
Zero4D: Training-Free 4D Video Generation From Single Video Using Off-the-Shelf Video Diffusion Models: Jangho Park,

Taesung Kwon,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jangho and Kwon, Taesung and Ye, Jong Chul}, title = {Zero4D: Training-Free 4D Video Generation From Single Video Using Off-the-Shelf Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4045-4054} }
Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs: Jouwon Song,

Sohyeon Kim,

Kyeongbo Kong; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Jouwon and Kim, Sohyeon and Kong, Kyeongbo}, title = {Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9510-9519} }
MADrive: Memory-Augmented Driving Scene Modeling: Polina Karpikova,

Daniil Selikhanovych,

Kirill Struminsky,

Ruslan Musaev,

Maria Golitsyna,

Dmitry Baranchuk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karpikova_2026_CVPR, author = {Karpikova, Polina and Selikhanovych, Daniil and Struminsky, Kirill and Musaev, Ruslan and Golitsyna, Maria and Baranchuk, Dmitry}, title = {MADrive: Memory-Augmented Driving Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {54-65} }
BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models: Ziheng Zhu,

Yuncheng Guo,

Jie Xu,

Xiaodong Gu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziheng and Guo, Yuncheng and Xu, Jie and Gu, Xiaodong}, title = {BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7060-7069} }
INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models: Parsa Madinei,

Ryan Solgi,

Ziqi Wen,

Jonathan Skaza,

Miguel Eckstein,

Ramtin Pedarsani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Madinei_2026_CVPR, author = {Madinei, Parsa and Solgi, Ryan and Wen, Ziqi and Skaza, Jonathan and Eckstein, Miguel and Pedarsani, Ramtin}, title = {INTERLACE: Interleaved Layer Pruning and Efficient Adaptation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2947-2956} }
Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning: Kyle Domico,

Jean-Charles Noirot Ferrand,

Ryan Sheatsley,

Eric Pauley,

Josiah Hanna,

Patrick McDaniel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Domico_2026_CVPR, author = {Domico, Kyle and Ferrand, Jean-Charles Noirot and Sheatsley, Ryan and Pauley, Eric and Hanna, Josiah and McDaniel, Patrick}, title = {Adversarial Agents: Black-Box Evasion Attacks with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {647-655} }
TAUE: Training-free Noise Transplant and Cultivation Diffusion Model: Daichi Nagai,

Ryugo Morita,

Shunsuke Kitada,

Hitoshi Iyatomi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagai_2026_CVPR, author = {Nagai, Daichi and Morita, Ryugo and Kitada, Shunsuke and Iyatomi, Hitoshi}, title = {TAUE: Training-free Noise Transplant and Cultivation Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3749-3758} }
Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation: Shuqi Xia,

Guangze Shi,

Jiarui Cao,

Aoyuan Shi,

Meilin Liu,

Xiaoyi Zhang,

Yujie Wang,

Xueyu Liu,

Cai Zhao,

Ziyuan He,

Yongfei Wu,

Mingqiang Wei; [pdf]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Shuqi and Shi, Guangze and Cao, Jiarui and Shi, Aoyuan and Liu, Meilin and Zhang, Xiaoyi and Wang, Yujie and Liu, Xueyu and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang}, title = {Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7514-7519} }
VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning: Zengjie Hu,

Jiantao Qiu,

Tianyi Bai,

Haojin Yang,

Binhang Yuan,

Qi Jing,

Conghui He,

Wentao Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zengjie and Qiu, Jiantao and Bai, Tianyi and Yang, Haojin and Yuan, Binhang and Jing, Qi and He, Conghui and Zhang, Wentao}, title = {VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9846-9855} }
3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction: Beiyuan Zhang,

Hesong Li,

Ruiwen Shao,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beiyuan and Li, Hesong and Shao, Ruiwen and Fu, Ying}, title = {3D Gaussian Splatting for Annular Dark Field Scanning Transmission Electron Microscopy Tomography Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {306-315} }
Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling: Minyoung Kim,

Paul Hongsuck Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung and Seo, Paul Hongsuck}, title = {Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8877-8886} }
Fast Autoregressive Video Generation with Diagonal Decoding: Yang Ye,

Junliang Guo,

Haoyu Wu,

Tianyu He,

Tim Pearce,

Tabish Rashid,

Katja Hofmann,

Jiang Bian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yang and Guo, Junliang and Wu, Haoyu and He, Tianyu and Pearce, Tim and Rashid, Tabish and Hofmann, Katja and Bian, Jiang}, title = {Fast Autoregressive Video Generation with Diagonal Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4419-4428} }
Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework: Yilin Wang,

Dawei Luo,

Shuai Chen,

Feng Xu,

Jiachi Wang,

Zunlei Feng,

Yijun Bei; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yilin and Luo, Dawei and Chen, Shuai and Xu, Feng and Wang, Jiachi and Feng, Zunlei and Bei, Yijun}, title = {Cross-Dimensional Forgery Pattern Extraction for Generalizable Forgery Localization Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2314-2323} }
HAM: A Training-Free Style Transfer Approach via Heterogeneous Attention Modulation for Diffusion Models: Yeqi He,

Liang Li,

Zhiwen Yang,

Xichun Sheng,

Zhidong Zhao,

Chenggang Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yeqi and Li, Liang and Yang, Zhiwen and Sheng, Xichun and Zhao, Zhidong and Yan, Chenggang}, title = {HAM: A Training-Free Style Transfer Approach via Heterogeneous Attention Modulation for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3914-3923} }
PGDM: Physics-Guided Noise-Free Diffusion Model Based on Point Spread Function for Light-Scattering Removal in Unpaired Biomedical Images: Jinze Zhao,

Keyi Han,

Qiushi Huang,

Jie Tian,

Zhenhua Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jinze and Han, Keyi and Huang, Qiushi and Tian, Jie and Hu, Zhenhua}, title = {PGDM: Physics-Guided Noise-Free Diffusion Model Based on Point Spread Function for Light-Scattering Removal in Unpaired Biomedical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5599-5608} }
FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation: Minh Khoa Le,

Kien Do,

Duc Thanh Nguyen,

Truyen Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Minh Khoa and Do, Kien and Nguyen, Duc Thanh and Tran, Truyen}, title = {FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4234-4244} }
FREESTYLE: An Anchor-Free Mechanism for Training-Free Style-Aligned Image Generation: Minseok Oh,

Jihun Park,

Jongmin Gim,

Minwoo Choi,

Kyoungmin Lee,

Ferdinando Fioretto,

Sunghoon Im; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Minseok and Park, Jihun and Gim, Jongmin and Choi, Minwoo and Lee, Kyoungmin and Fioretto, Ferdinando and Im, Sunghoon}, title = {FREESTYLE: An Anchor-Free Mechanism for Training-Free Style-Aligned Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3779-3788} }
Enriching Knowledge Distillation with Cross-Modal Teacher Fusion: Amir M. Mansourian,

Amir Mohammad Babaei,

Shohreh Kasaei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mansourian_2026_CVPR, author = {Mansourian, Amir M. and Babaei, Amir Mohammad and Kasaei, Shohreh}, title = {Enriching Knowledge Distillation with Cross-Modal Teacher Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2893-2903} }
SAT: Selective Aggregation Transformer for Image Super-Resolution: Dinh Phu Tran,

Thao Do,

Saad Wazir,

Seongah Kim,

Seon Kwon Kim,

Daeyoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Dinh Phu and Do, Thao and Wazir, Saad and Kim, Seongah and Kim, Seon Kwon and Kim, Daeyoung}, title = {SAT: Selective Aggregation Transformer for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4982-4992} }
Video2LoRA: Unified Semantic-Controlled Video Generation via Per-Reference-Video LoRA: Zexi Wu,

Baolu Li,

Jing Dai,

Yiming Zhang,

Yue Ma,

Qinghe Wang,

Xu Jia,

Hongming Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zexi and Li, Baolu and Dai, Jing and Zhang, Yiming and Ma, Yue and Wang, Qinghe and Jia, Xu and Xu, Hongming}, title = {Video2LoRA: Unified Semantic-Controlled Video Generation via Per-Reference-Video LoRA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4202-4212} }
Brain-Inspired Multimodal Spike Neural Network for Image-Text Retrieval: Xintao Zong,

Wenxuan Liu,

Jianhao Ding,

Zhaofei Yu,

Xian Zhong,

Tiejun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zong_2026_CVPR, author = {Zong, Xintao and Liu, Wenxuan and Ding, Jianhao and Yu, Zhaofei and Zhong, Xian and Huang, Tiejun}, title = {Brain-Inspired Multimodal Spike Neural Network for Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5137-5146} }
GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction: Wanyu Zhang,

Yanzhao Shi,

Chengxin Zheng,

Hua Wang,

Jianing Wang,

Yue Zhang,

Xiaobing Yu,

Xiaodan Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyu and Shi, Yanzhao and Zheng, Chengxin and Wang, Hua and Wang, Jianing and Zhang, Yue and Yu, Xiaobing and Zhang, Xiaodan}, title = {GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9622-9631} }
Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection: Yan Li,

Zhouchao Fu,

Wenbin Lu,

Junjie Zheng,

Junnan Xu,

Junjie Liao,

Jianwei Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Fu, Zhouchao and Lu, Wenbin and Zheng, Junjie and Xu, Junnan and Liao, Junjie and Zheng, Jianwei}, title = {Riemannian Score-Based Diffusion for Language-Conditioned Grasp and Affordance Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1546-1555} }
AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts: Yuting Gao,

Lan Wang,

Hengyuan Zhao,

Linjiang Huang,

Si Liu,

Qingpei Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuting and Wang, Lan and Zhao, Hengyuan and Huang, Linjiang and Liu, Si and Guo, Qingpei}, title = {AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9205-9214} }
SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching: Chengshan Yang,

Pengnian Zhang,

Jinjing Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Chengshan and Zhang, Pengnian and Zhao, Jinjing}, title = {SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6695-6705} }
MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training: Zhenhan Yin,

Xuanhan Wang,

Jiahao Jiang,

Kaiyuan Deng,

Pengqi Chen,

Shuangle Li,

Chong Liu,

Xing Xu,

Jingkuan Song,

Lianli Gao,

Heng Tao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Zhenhan and Wang, Xuanhan and Jiang, Jiahao and Deng, Kaiyuan and Chen, Pengqi and Li, Shuangle and Liu, Chong and Xu, Xing and Song, Jingkuan and Gao, Lianli and Shen, Heng Tao}, title = {MiVLA: Towards Generalizable Vision-Language-Action Model with Human-Robot Mutual Imitation Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1535-1545} }
PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction: Xueheng Li,

Tao Hu,

Ke Cao,

Runsheng Qi,

Huixin Zhang,

Rui Li,

Jie Zhang,

Chengjun Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xueheng and Hu, Tao and Cao, Ke and Qi, Runsheng and Zhang, Huixin and Li, Rui and Zhang, Jie and Xie, Chengjun}, title = {PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8826-8835} }
WHOLE: World-Grounded Hand-Object Lifted from Egocentric Videos: Yufei Ye,

Jiaman Li,

Ryan Rong,

C. Karen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yufei and Li, Jiaman and Rong, Ryan and Liu, C. Karen}, title = {WHOLE: World-Grounded Hand-Object Lifted from Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3481-3491} }
3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework: Tobias Sautter,

Jan-Niklas Dihlmann,

Hendrik P A Lensch; [pdf] [arXiv]
[bibtex]
@InProceedings{Sautter_2026_CVPR, author = {Sautter, Tobias and Dihlmann, Jan-Niklas and A Lensch, Hendrik P}, title = {3D-RE-GEN: 3D Reconstruction of Indoor Scenes with a Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {528-537} }
E-GRPO: High Entropy Steps Drive Effective Reinforcement Learning for Flow Models: Shengjun Zhang,

Zhang Zhang,

Chensheng Dai,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shengjun and Zhang, Zhang and Dai, Chensheng and Duan, Yueqi}, title = {E-GRPO: High Entropy Steps Drive Effective Reinforcement Learning for Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4429-4439} }
Video Generation Models are Good Latent Reward Models: Xiaoyue Mi,

Wenqing Yu,

Jiesong Lian,

Shibo Jie,

Ruizhe Zhong,

Zijun Liu,

Guozhen Zhang,

Zixiang Zhou,

Zhiyong Xu,

Yuan Zhou,

Qinglin Lu,

Fan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2026_CVPR, author = {Mi, Xiaoyue and Yu, Wenqing and Lian, Jiesong and Jie, Shibo and Zhong, Ruizhe and Liu, Zijun and Zhang, Guozhen and Zhou, Zixiang and Xu, Zhiyong and Zhou, Yuan and Lu, Qinglin and Tang, Fan}, title = {Video Generation Models are Good Latent Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4719-4728} }
GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes: Beibei Lin,

Xiao Cao,

Jingyuan Guo,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Beibei and Cao, Xiao and Guo, Jingyuan and Tan, Robby T.}, title = {GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {275-284} }
RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models: Ravi Ranjan,

Utkarsh Grover,

Xiaomin Lin,

Agoritsa Polyzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranjan_2026_CVPR, author = {Ranjan, Ravi and Grover, Utkarsh and Lin, Xiaomin and Polyzou, Agoritsa}, title = {RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7998-8008} }
Beyond Single Object: Learning 3D Relations with Large Language Models: Kohsuke Ide,

Ryousuke Yamada,

Yue Qiu,

Xianzheng Ma,

Yoshihiro Fukuhara,

Hirokatsu Kataoka,

Yutaka Satoh; [pdf] [supp]
[bibtex]
@InProceedings{Ide_2026_CVPR, author = {Ide, Kohsuke and Yamada, Ryousuke and Qiu, Yue and Ma, Xianzheng and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Satoh, Yutaka}, title = {Beyond Single Object: Learning 3D Relations with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9684-9694} }
Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models: Sijie Wang,

Yingying Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Sijie and Zhu, Yingying}, title = {Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7615-7624} }
From Adaptation to Generalization: Adaptive Visual Prompting for Medical Image Segmentation: Evren Çetinkaya,

Sangmin Lee,

Jung Uk Kim,

Hong Joo Lee,

Nassir Navab; [pdf] [supp]
[bibtex]
@InProceedings{Cetinkaya_2026_CVPR, author = {\c{C}etinkaya, Evren and Lee, Sangmin and Kim, Jung Uk and Lee, Hong Joo and Navab, Nassir}, title = {From Adaptation to Generalization: Adaptive Visual Prompting for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5325-5335} }
Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving: Zhexi Lian,

Haoran Wang,

Xuerun Yan,

Weimeng Lin,

Xianhong Zhang,

Yongyu Chen,

Jia Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Zhexi and Wang, Haoran and Yan, Xuerun and Lin, Weimeng and Zhang, Xianhong and Chen, Yongyu and Hu, Jia}, title = {Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {920-930} }
From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation: Rafael Pablos Sarabia,

Joachim Nyborg,

Morten Birk,

Ira Assent; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarabia_2026_CVPR, author = {Sarabia, Rafael Pablos and Nyborg, Joachim and Birk, Morten and Assent, Ira}, title = {From Drops to Grid: Noise-Aware Spatio-Temporal Neural Process for Rainfall Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2606-2617} }
StereoSpace: Depth-Free Synthesis of Stereo Geometry via End-to-End Diffusion in a Canonical Space: Tjark Behrens,

Anton Obukhov,

Bingxin Ke,

Fabio Tosi,

Matteo Poggi,

Konrad Schindler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behrens_2026_CVPR, author = {Behrens, Tjark and Obukhov, Anton and Ke, Bingxin and Tosi, Fabio and Poggi, Matteo and Schindler, Konrad}, title = {StereoSpace: Depth-Free Synthesis of Stereo Geometry via End-to-End Diffusion in a Canonical Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3975-3987} }
Do Vision Models Perceive Illusory Motion in Static Images Like Humans?: Isabella E. Rosario,

Fan L. Cheng,

Zitang Sun,

Nikolaus Kriegeskorte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosario_2026_CVPR, author = {Rosario, Isabella E. and Cheng, Fan L. and Sun, Zitang and Kriegeskorte, Nikolaus}, title = {Do Vision Models Perceive Illusory Motion in Static Images Like Humans?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5515-5524} }
Volumetrically Consistent Implicit Atlas Learning via Neural Diffeomorphic Flow for Placenta MRI: Athena Taymourtash,

S Mazdak Abulnaga,

Esra Abaci-Turk,

P Ellen Grant,

Polina Golland; [pdf] [supp]
[bibtex]
@InProceedings{Taymourtash_2026_CVPR, author = {Taymourtash, Athena and Abulnaga, S Mazdak and Abaci-Turk, Esra and Grant, P Ellen and Golland, Polina}, title = {Volumetrically Consistent Implicit Atlas Learning via Neural Diffeomorphic Flow for Placenta MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5285-5294} }
Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use: Ding Yi,

Yanzhao Yu,

Xili Dai,

Xianbiao Qi,

Peiwen Sun,

Xueqian Wang,

Xiangyu Yue,

Jianan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Ding and Yu, Yanzhao and Dai, Xili and Qi, Xianbiao and Sun, Peiwen and Wang, Xueqian and Yue, Xiangyu and Wang, Jianan}, title = {Evolve Vision-Language-Action Model into an Agent with On-the-fly Tool-use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1346-1357} }
Learning from Label Proportion with Dual-Proportion Constraints: Tianhao Ma,

Ximing Li,

Changchun Li,

Renchu Guan; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tianhao and Li, Ximing and Li, Changchun and Guan, Renchu}, title = {Learning from Label Proportion with Dual-Proportion Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7583-7592} }
Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis: Helu Zhi,

Jingjing Huang,

Wang Xu,

Yangbin Xu,

Yibin Huang,

Wanyue Zhang,

Baoyang Jiang,

Shirui Deng,

Liang Zhu,

FangFang Li,

Tiejun Zhao,

Yankai Lin,

Yuan Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhi_2026_CVPR, author = {Zhi, Helu and Huang, Jingjing and Xu, Wang and Xu, Yangbin and Huang, Yibin and Zhang, Wanyue and Jiang, Baoyang and Deng, Shirui and Zhu, Liang and Li, FangFang and Zhao, Tiejun and Lin, Yankai and Yao, Yuan}, title = {Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9215-9224} }
EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks: Lulin Liu,

Dayou Li,

Yiqing Liang,

Sicong Jiang,

Hitesh Vijay,

Hezhen Hu,

Xuhai Xu,

Zirui Liu,

Srinivas Shakkottai,

Manling Li,

Zhiwen Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Lulin and Li, Dayou and Liang, Yiqing and Jiang, Sicong and Vijay, Hitesh and Hu, Hezhen and Xu, Xuhai and Liu, Zirui and Shakkottai, Srinivas and Li, Manling and Fan, Zhiwen}, title = {EgoTL: Egocentric Think-Aloud Chains for Long-Horizon Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2017-2027} }
FUSION: Full-body Unified Motion Prior for Body and Hands Via Diffusion: Enes Duran,

Nikos Athanasiou,

Muhammed Kocabas,

Michael J. Black,

Omid Taheri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duran_2026_CVPR, author = {Duran, Enes and Athanasiou, Nikos and Kocabas, Muhammed and Black, Michael J. and Taheri, Omid}, title = {FUSION: Full-body Unified Motion Prior for Body and Hands Via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3438-3448} }
Fast-HOI: Fast Human-Object Interaction Synthesis via Distilled Interaction Prior and Physical Constrains: Xiaokang Pan,

Zhizhong Zhang,

Yangyuan Liu,

Zhuoran Chen,

Zhiwei Zhang,

Bin Ji,

Mingang Chen,

Yong Xie,

Jingyu Gong,

Xuhong Wang,

Xin Tan,

Yuan Xie; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Xiaokang and Zhang, Zhizhong and Liu, Yangyuan and Chen, Zhuoran and Zhang, Zhiwei and Ji, Bin and Chen, Mingang and Xie, Yong and Gong, Jingyu and Wang, Xuhong and Tan, Xin and Xie, Yuan}, title = {Fast-HOI: Fast Human-Object Interaction Synthesis via Distilled Interaction Prior and Physical Constrains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3719-3728} }
Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling: Pengfei Yang,

Sifu Luo,

Feng Wu,

Fan Zhou,

Ting Zhong; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Pengfei and Luo, Sifu and Wu, Feng and Zhou, Fan and Zhong, Ting}, title = {Variable-View Diffusion with Geometric Uncertainty Unlocks LiDAR Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1148-1158} }
VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving: Levente Tempfli,

Esteban Rivera,

Markus Lienkamp; [pdf] [supp]
[bibtex]
@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Rivera, Esteban and Lienkamp, Markus}, title = {VESPA: Open-World Auto-Labeling for 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {960-969} }
PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning: Tao Huang,

Jiayang Meng,

Hong Chen,

Chen Hou,

Guolong Zheng,

Xu Yang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Tao and Meng, Jiayang and Chen, Hong and Hou, Chen and Zheng, Guolong and Yang, Xu}, title = {PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8029-8038} }
Gen-n-Val: Agentic Image Data Generation and Validation: Jing-En Huang,

I-Sheng Fang,

Tzuhsuan Huang,

Yu-Lun Liu,

Chih-Yu Wang,

Jun-Cheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jing-En and Fang, I-Sheng and Huang, Tzuhsuan and Liu, Yu-Lun and Wang, Chih-Yu and Chen, Jun-Cheng}, title = {Gen-n-Val: Agentic Image Data Generation and Validation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8786-8795} }
Event-Based Optical Flow Leveraging Precise Event Timing: Hugh Greatorex,

Elisabetta Chicca; [pdf] [supp]
[bibtex]
@InProceedings{Greatorex_2026_CVPR, author = {Greatorex, Hugh and Chicca, Elisabetta}, title = {Event-Based Optical Flow Leveraging Precise Event Timing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3178-3188} }
PBSBench: A Multi-Level Vision-Language Framework and Benchmark for Hematopathology Whole Slide Image Interpretation: Yuanlong Wang,

Weichi Chen,

Adrian Rajab,

Wenfang Liu,

Yulan Jin,

Andrew Srisuwananukorn,

Ping Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanlong and Chen, Weichi and Rajab, Adrian and Liu, Wenfang and Jin, Yulan and Srisuwananukorn, Andrew and Zhang, Ping}, title = {PBSBench: A Multi-Level Vision-Language Framework and Benchmark for Hematopathology Whole Slide Image Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5569-5578} }
QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos: Zijun Xu,

Zhengqian Wu,

Chunjie Zhang,

Zhongyuan Wang,

Chunxia Xiao,

Chao Liang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zijun and Wu, Zhengqian and Zhang, Chunjie and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8247-8257} }
Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering: Noah Frahm,

Prakrut Patel,

Yue Zhang,

Shoubin Yu,

Mohit Bansal,

Roni Sengupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Frahm_2026_CVPR, author = {Frahm, Noah and Patel, Prakrut and Zhang, Yue and Yu, Shoubin and Bansal, Mohit and Sengupta, Roni}, title = {Prune-Then-Plan: Step-Level Calibration for Stable Frontier Exploration in Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3114-3123} }
AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting: Yuyuan Liu,

Yuanhong Chen,

Chong Wang,

Junlin Han,

Junde Wu,

Can Peng,

Jingkun Chen,

Yu Tian,

Gustavo Carneiro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Chen, Yuanhong and Wang, Chong and Han, Junlin and Wu, Junde and Peng, Can and Chen, Jingkun and Tian, Yu and Carneiro, Gustavo}, title = {AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7315-7325} }
Conformal Cross-Modal Active Learning: Huy Hoang Nguyen,

Cédric Jung,

Shirin Salehi,

Tobias Glück,

Anke Schmeink,

Andreas Kugi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Huy Hoang and Jung, C\'edric and Salehi, Shirin and Gl\"uck, Tobias and Schmeink, Anke and Kugi, Andreas}, title = {Conformal Cross-Modal Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5147-5157} }
MPM: Mutual Pair Merging for Efficient Vision Transformers: Simon Ravé,

Pejman Rasti,

David Rousseau; [pdf] [supp]
[bibtex]
@InProceedings{Rave_2026_CVPR, author = {Rav\'e, Simon and Rasti, Pejman and Rousseau, David}, title = {MPM: Mutual Pair Merging for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2998-3008} }
HazeMatching: Dehazing Light Microscopy Images with Guided Conditional Flow Matching: Anirban Ray,

Ashesh Ashesh,

Florian Jug; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ray_2026_CVPR, author = {Ray, Anirban and Ashesh, Ashesh and Jug, Florian}, title = {HazeMatching: Dehazing Light Microscopy Images with Guided Conditional Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5652-5661} }
Gaussian Shannon: High-Precision Diffusion Model Watermarking Based on Communication: Yi Zhang,

Hongbo Huang,

Liang-Jie Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi and Huang, Hongbo and Zhang, Liang-Jie}, title = {Gaussian Shannon: High-Precision Diffusion Model Watermarking Based on Communication}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3924-3932} }
Prototype and Sample Level Semantic Alignment for Incomplete Multi-View Clustering: Zhengzhong Zhu,

Pei Zhou,

Lanxi Bai,

Jia Nie,

Li Cheng,

Shiquan Min,

Jiangping Zhu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Zhou, Pei and Bai, Lanxi and Nie, Jia and Cheng, Li and Min, Shiquan and Zhu, Jiangping}, title = {Prototype and Sample Level Semantic Alignment for Incomplete Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5818-5827} }
Diffusion^2: Turning 3D Environments into Radio Frequency Heatmaps: Kyoungjun Park,

Yifan Yang,

Changhan Ge,

Lili Qiu,

Shiqi Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Kyoungjun and Yang, Yifan and Ge, Changhan and Qiu, Lili and Jiang, Shiqi}, title = {Diffusion{\textasciicircum}2: Turning 3D Environments into Radio Frequency Heatmaps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6414-6423} }
Objects in Generated Videos Are Slower Than They Appear: Models Suffer Sub-Earth Gravity and Don't Know Galileo's Principle...for now: Varun Varma Thozhiyoor,

Shivam Tripathi,

Venkatesh Babu Radhakrishnan,

Anand Bhattad; [pdf] [supp]
[bibtex]
@InProceedings{Thozhiyoor_2026_CVPR, author = {Thozhiyoor, Varun Varma and Tripathi, Shivam and Radhakrishnan, Venkatesh Babu and Bhattad, Anand}, title = {Objects in Generated Videos Are Slower Than They Appear: Models Suffer Sub-Earth Gravity and Don't Know Galileo's Principle...for now}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3830-3839} }
Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior: Jiaying Lin,

Shuquan Ye,

Dan Xu,

Wanli Ouyang,

Rynson W. H. Lau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiaying and Ye, Shuquan and Xu, Dan and Ouyang, Wanli and Lau, Rynson W. H.}, title = {Do MLLMs Exhibit Human-like Perceptual Behaviors? HVSBench: A Benchmark for MLLM Alignment with Human Perceptual Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1818-1827} }
One Model for All: Unified Try-On and Try-Off in Any Pose via LLM-Inspired Bidirectional Tweedie Diffusion: Jinxi Liu,

Zijian He,

Guangrun Wang,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinxi and He, Zijian and Wang, Guangrun and Li, Guanbin and Lin, Liang}, title = {One Model for All: Unified Try-On and Try-Off in Any Pose via LLM-Inspired Bidirectional Tweedie Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4310-4320} }
Surgical Procedural Planning as 3D World Modelling: Towards Automated Pulmonary Resection: Zhen Zhang,

Zhaorong Dong,

Xiao Yang,

Liqin Huang,

Qiang Wu,

Taidui Zeng,

Hanyu Zheng,

Mingjing Yang,

Shaohua Zheng,

Wangbin Ding,

Lin Pan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhen and Dong, Zhaorong and Yang, Xiao and Huang, Liqin and Wu, Qiang and Zeng, Taidui and Zheng, Hanyu and Yang, Mingjing and Zheng, Shaohua and Ding, Wangbin and Pan, Lin}, title = {Surgical Procedural Planning as 3D World Modelling: Towards Automated Pulmonary Resection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5315-5324} }
Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal: Eun-Ju Park,

Youjin Shin,

Simon S. Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Eun-Ju and Shin, Youjin and Woo, Simon S.}, title = {Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7978-7987} }
AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning: Qilin Xiang,

Qilin Fan,

Xinrui Li,

Tianfu Wang,

Shuting Qiu,

Yue Niu; [pdf]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qilin and Fan, Qilin and Li, Xinrui and Wang, Tianfu and Qiu, Shuting and Niu, Yue}, title = {AlignFL: Adaptive Learning and Intelligent Generation of Networks for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3019-3028} }
A Framework for Evaluating Zero-Shot Image Generation in Concept-Based Explainability: Giacomo Astolfi,

Matteo Bianchi,

Riccardo Campi,

Antonio De Santis,

Marco Brambilla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Astolfi_2026_CVPR, author = {Astolfi, Giacomo and Bianchi, Matteo and Campi, Riccardo and De Santis, Antonio and Brambilla, Marco}, title = {A Framework for Evaluating Zero-Shot Image Generation in Concept-Based Explainability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3303-3311} }
Visual2Echo Compositional Contrastive Learning (V2E-CCL): Binaural Knowledge Distilled Network for Depth Prediction: Nazrul Ismail,

Owais Ahmed Malik,

Ong Wee Hong; [pdf]
[bibtex]
@InProceedings{Ismail_2026_CVPR, author = {Ismail, Nazrul and Malik, Owais Ahmed and Hong, Ong Wee}, title = {Visual2Echo Compositional Contrastive Learning (V2E-CCL): Binaural Knowledge Distilled Network for Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6019-6028} }
A Low-Rank Learning Framework Integrating Detection, Masking, and Recovery for Occluded Facial Expression Recognition: Yanzhong Wang,

Daming Shi; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yanzhong and Shi, Daming}, title = {A Low-Rank Learning Framework Integrating Detection, Masking, and Recovery for Occluded Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6528-6537} }
InstructTable: Improving Table Structure Recognition Through Instruction: Boming Chen,

Zining Wang,

Zhentao Guo,

Jianqiang Liu,

Chen Duan,

Yu Gu,

Kai zhou,

Pengfei Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Boming and Wang, Zining and Guo, Zhentao and Liu, Jianqiang and Duan, Chen and Gu, Yu and zhou, Kai and Yan, Pengfei}, title = {InstructTable: Improving Table Structure Recognition Through Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2742-2752} }
Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation: Wenjie Zhao,

Jia Li,

Xin Dong,

Yapeng Tian,

Yu Xiang,

Yunhui Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Wenjie and Li, Jia and Dong, Xin and Tian, Yapeng and Xiang, Yu and Guo, Yunhui}, title = {Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6789-6798} }
Towards Robust Content Watermarking Against Removal and Forgery Attacks: Yifan Zhu,

Yihan Wang,

Xiao-Shan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yifan and Wang, Yihan and Gao, Xiao-Shan}, title = {Towards Robust Content Watermarking Against Removal and Forgery Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8059-8069} }
No Cache Left Idle: Accelerating diffusion model via Extreme-Slimming Caching: Tingyan Wen,

Haoyu Li,

Yihuang Chen,

Xing Zhou,

Lifei Zhu,

XueQian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Tingyan and Li, Haoyu and Chen, Yihuang and Zhou, Xing and Zhu, Lifei and Wang, XueQian}, title = {No Cache Left Idle: Accelerating diffusion model via Extreme-Slimming Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4108-4117} }
ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models: Nastaran Darabi,

Amit Ranjan Trivedi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Darabi_2026_CVPR, author = {Darabi, Nastaran and Trivedi, Amit Ranjan}, title = {ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9013-9022} }
Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation: Yadang Chen,

Qi Liu,

Guoqing Zhang,

Le Sun,

Yuhui Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yadang and Liu, Qi and Zhang, Guoqing and Sun, Le and Zheng, Yuhui}, title = {Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7841-7851} }
LoViC: Efficient Long Video Generation with Context Compression: Jiaxiu Jiang,

Wenbo Li,

Jingjing Ren,

Yuping Qiu,

Renjing Pei,

Fenglong Song,

Yong Guo,

Xiaogang Xu,

Han Wu,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jiaxiu and Li, Wenbo and Ren, Jingjing and Qiu, Yuping and Pei, Renjing and Song, Fenglong and Guo, Yong and Xu, Xiaogang and Wu, Han and Zuo, Wangmeng}, title = {LoViC: Efficient Long Video Generation with Context Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4022-4034} }
TalkVid: A Large-Scale Diversified Dataset for Audio-Driven Talking Head Synthesis: Shunian Chen,

Hejin Huang,

Yexin Liu,

Zihan Ye,

Pengcheng Chen,

Chenghao Zhu,

Michael Guan,

Rongsheng Wang,

Junying Chen,

Jianye Hou,

Bo Li,

Guanbin Li,

Ser-Nam Lim,

Harry Yang,

Benyou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shunian and Huang, Hejin and Liu, Yexin and Ye, Zihan and Chen, Pengcheng and Zhu, Chenghao and Guan, Michael and Wang, Rongsheng and Chen, Junying and Hou, Jianye and Li, Bo and Li, Guanbin and Lim, Ser-Nam and Yang, Harry and Wang, Benyou}, title = {TalkVid: A Large-Scale Diversified Dataset for Audio-Driven Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3492-3500} }
LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation: Haichao Zhang,

Yao Lu,

Lichen Wang,

Yunzhe Li,

Daiwei Chen,

Yunpeng Xu,

Yun Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haichao and Lu, Yao and Wang, Lichen and Li, Yunzhe and Chen, Daiwei and Xu, Yunpeng and Fu, Yun}, title = {LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7111-7121} }
AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models: Jintao Lin,

Bowen Dong,

Weikang Shi,

Chenyang Lei,

Suiyun Zhang,

Rui Liu,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jintao and Dong, Bowen and Shi, Weikang and Lei, Chenyang and Zhang, Suiyun and Liu, Rui and Liu, Xihui}, title = {AEGIS: Exploring the Limit of World Knowledge Capabilities for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1797-1807} }
LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets: Woo-Jin Jung,

Dong-Hee Paek,

Seung-Hyun Kong; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Woo-Jin and Paek, Dong-Hee and Kong, Seung-Hyun}, title = {LiDAR-to-4D Radar Synthesis for Building Large-Scale Tensor Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {889-899} }
VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning: Chenglin Li,

Qianglong Chen,

Feng Han,

Yikun Wang,

Xingxi Yin,

Yan Gong,

Ruilin Li,

Yin Zhang,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenglin and Chen, Qianglong and Han, Feng and Wang, Yikun and Yin, Xingxi and Gong, Yan and Li, Ruilin and Zhang, Yin and Wang, Jiaqi}, title = {VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8226-8236} }
EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration: Runze Li,

Yuwen Zhai,

Bo Xu,

Liwu Xu,

Nian Shi,

Wei Zhang,

Ran Lin,

Liang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Runze and Zhai, Yuwen and Xu, Bo and Xu, Liwu and Shi, Nian and Zhang, Wei and Lin, Ran and Wang, Liang}, title = {EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9347-9356} }
Loom: Diffusion-Transformer for Interleaved Generation: Mingcheng Ye,

Jiaming Liu,

Yiren Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Mingcheng and Liu, Jiaming and Song, Yiren}, title = {Loom: Diffusion-Transformer for Interleaved Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4582-4592} }
Concept Erasure via Attention Redirection: Amit Schechter,

Rinon Gal,

Ofir Kedem,

Gal Chechik,

Daniel Cohen-Or; [pdf] [supp]
[bibtex]
@InProceedings{Schechter_2026_CVPR, author = {Schechter, Amit and Gal, Rinon and Kedem, Ofir and Chechik, Gal and Cohen-Or, Daniel}, title = {Concept Erasure via Attention Redirection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4572-4581} }
FIRE-CIR: Fine-grained Reasoning for Composed Fashion Image Retrieval: François Gardères,

Camille-Sovanneary Gauthier,

Jean Ponce,

Shizhe Chen; [pdf] [supp]
[bibtex]
@InProceedings{Garderes_2026_CVPR, author = {Gard\`eres, Fran\c{c}ois and Gauthier, Camille-Sovanneary and Ponce, Jean and Chen, Shizhe}, title = {FIRE-CIR: Fine-grained Reasoning for Composed Fashion Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5694-5703} }
BridgeDiffusion: Latent Space Optimization for Independent Body-Part Generation with Motion Consistency Bridges in Interactive Dance: Yufei Huo,

Ao Li,

Wenxun Dai,

Songli Wu,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Huo_2026_CVPR, author = {Huo, Yufei and Li, Ao and Dai, Wenxun and Wu, Songli and Tang, Yansong}, title = {BridgeDiffusion: Latent Space Optimization for Independent Body-Part Generation with Motion Consistency Bridges in Interactive Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3449-3459} }
DINO-VO: Learning Where to Focus for Enhanced State Estimation: Qi Chen,

Guanghao Li,

Sijia Hu,

Xin Gao,

Junpeng Ma,

Xiangyang Xue,

Jian Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qi and Li, Guanghao and Hu, Sijia and Gao, Xin and Ma, Junpeng and Xue, Xiangyang and Pu, Jian}, title = {DINO-VO: Learning Where to Focus for Enhanced State Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1556-1566} }
VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning: Minghong Cai,

Qiulin Wang,

Zongli Ye,

Wenze Liu,

Quande Liu,

Weicai Ye,

Xintao Wang,

Pengfei Wan,

Kun Gai,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Minghong and Wang, Qiulin and Ye, Zongli and Liu, Wenze and Liu, Quande and Ye, Weicai and Wang, Xintao and Wan, Pengfei and Gai, Kun and Yue, Xiangyu}, title = {VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4475-4485} }
V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning: Zixu Cheng,

Jian Hu,

Ziquan Liu,

Chenyang Si,

Wei Li,

Shaogang Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zixu and Hu, Jian and Liu, Ziquan and Si, Chenyang and Li, Wei and Gong, Shaogang}, title = {V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9155-9164} }
GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis: Thomas Tanay,

Mohammed Brahimi,

Michal Nazarczuk,

Qingwen Zhang,

Sibi Catley-Chandar,

Arthur Moreau,

Zhensong Zhang,

Eduardo Pérez-Pellitero; [pdf] [arXiv]
[bibtex]
@InProceedings{Tanay_2026_CVPR, author = {Tanay, Thomas and Brahimi, Mohammed and Nazarczuk, Michal and Zhang, Qingwen and Catley-Chandar, Sibi and Moreau, Arthur and Zhang, Zhensong and P\'erez-Pellitero, Eduardo}, title = {GRVS: a Generalizable and Recurrent Approach to Monocular Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {348-359} }
Fashion130K: An E-commerce Fashion Dataset for Outfit Generation with Unified Multi-modal Condition: Yu He,

Ting Zhu,

Yichun Liu,

Lichen Ma,

Xinyuan Shan,

Jingling Fu,

Yu Shi,

Junshi Huang,

Yan Li; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yu and Zhu, Ting and Liu, Yichun and Ma, Lichen and Shan, Xinyuan and Fu, Jingling and Shi, Yu and Huang, Junshi and Li, Yan}, title = {Fashion130K: An E-commerce Fashion Dataset for Outfit Generation with Unified Multi-modal Condition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4769-4779} }
PoseGen: In-Context LoRA Finetuning for Pose-Controllable Long Human Video Generation: Jingxuan He,

Busheng Su,

Finn Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Jingxuan and Su, Busheng and Wong, Finn}, title = {PoseGen: In-Context LoRA Finetuning for Pose-Controllable Long Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4780-4789} }
Perturb and Recover: Fine-Tuning for Effective Backdoor Removal from CLIP: Naman Deep Singh,

Francesco Croce,

Matthias Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Naman Deep and Croce, Francesco and Hein, Matthias}, title = {Perturb and Recover: Fine-Tuning for Effective Backdoor Removal from CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6164-6173} }
The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning: Garima Arya Yadav,

Nilay Yilmaz,

Yezhou Yang; [pdf] [supp]
[bibtex]
@InProceedings{Yadav_2026_CVPR, author = {Yadav, Garima Arya and Yilmaz, Nilay and Yang, Yezhou}, title = {The Unwritten Benchmark: A New Challenge for Multimodal Machine Learning in Abstract Perceptual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2069-2078} }
Vision-Language Models for Automated 3D PET/CT Report Generation: Wenpei Jiao,

Ke Yan,

Jiajin Zhang,

Dakai Jin,

Zhaoheng Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Wenpei and Yan, Ke and Zhang, Jiajin and Jin, Dakai and Xie, Zhaoheng}, title = {Vision-Language Models for Automated 3D PET/CT Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5295-5304} }
SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction: Kang Han,

Wei Xiang,

Lu Yu,

Mathew Wyatt,

Gaowen Liu,

Ramana Rao Kompella; [pdf] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Kang and Xiang, Wei and Yu, Lu and Wyatt, Mathew and Liu, Gaowen and Kompella, Ramana Rao}, title = {SwiftNDC: Fast Neural Depth Correction for High-Fidelity 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {12-21} }
Temporally Consistent Long-Term Memory for 3D Single Object Tracking: Jaejoon Yoo,

SuBeen Lee,

Yerim Jeon,

Miso Lee,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoo_2026_CVPR, author = {Yoo, Jaejoon and Lee, SuBeen and Jeon, Yerim and Lee, Miso and Heo, Jae-Pil}, title = {Temporally Consistent Long-Term Memory for 3D Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8388-8397} }
SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration: Zhimin Shao,

Abhay Yadav,

Rama Chellappa,

Cheng Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Zhimin and Yadav, Abhay and Chellappa, Rama and Peng, Cheng}, title = {SPIDER: Spatial Image CorresponDence Estimator for Robust Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {253-263} }
Animated-ART: Multi-Layer Transparent Video Generation: Ziqiang Li,

Yunnan Wang,

Dong Chen,

Yue Dong,

Ji Li,

Yuhui Yuan,

Xin Jin; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ziqiang and Wang, Yunnan and Chen, Dong and Dong, Yue and Li, Ji and Yuan, Yuhui and Jin, Xin}, title = {Animated-ART: Multi-Layer Transparent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4150-4159} }
Why MLLMs Struggle to Determine Object Orientations: Anju Gopinath,

Nikhil Krishnaswamy,

Bruce Draper; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gopinath_2026_CVPR, author = {Gopinath, Anju and Krishnaswamy, Nikhil and Draper, Bruce}, title = {Why MLLMs Struggle to Determine Object Orientations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9836-9845} }
GLOW: Global Illumination-Aware Inverse Rendering of Indoor Scenes Captured with Dynamic Co-Located Light & Camera: Jiaye Wu,

Saeed Hadadan,

Geng Lin,

Peihan Tu,

Matthias Zwicker,

David Jacobs,

Roni Sengupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jiaye and Hadadan, Saeed and Lin, Geng and Tu, Peihan and Zwicker, Matthias and Jacobs, David and Sengupta, Roni}, title = {GLOW: Global Illumination-Aware Inverse Rendering of Indoor Scenes Captured with Dynamic Co-Located Light \& Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6445-6455} }
PEDRA: Evaluating the Realism of Pedestrian Dynamics in Video Generation: Aaron Appelle,

Jerome P. Lynch; [pdf] [supp]
[bibtex]
@InProceedings{Appelle_2026_CVPR, author = {Appelle, Aaron and Lynch, Jerome P.}, title = {PEDRA: Evaluating the Realism of Pedestrian Dynamics in Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4461-4474} }
Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data: Dumindu Tissera,

Omar Awadallah,

Muhammad Umair Danish,

Ayan Sadhu,

Katarina Grolinger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tissera_2026_CVPR, author = {Tissera, Dumindu and Awadallah, Omar and Danish, Muhammad Umair and Sadhu, Ayan and Grolinger, Katarina}, title = {Any-Class Presence Likelihood for Robust Multi-Label Classification with Abundant Negative Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2429-2439} }
Attention-Guided Energy Optimization for Label-Aligned Anomaly Generation: Zhibin Wan,

Zhiqiang Gao,

Mingjie Sun,

Yupei Wu,

Guohong Fu,

Ran Yi; [pdf]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Zhibin and Gao, Zhiqiang and Sun, Mingjie and Wu, Yupei and Fu, Guohong and Yi, Ran}, title = {Attention-Guided Energy Optimization for Label-Aligned Anomaly Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4170-4179} }
ConSel: Concept-Aware Self-supervised Learning for Regression Beyond Ordinal Tasks: Abdullah Tariq,

Bisma Saleem,

R Muhammad Atif Azad,

Martin Masek,

Syed Zulqarnain Gilani; [pdf] [supp]
[bibtex]
@InProceedings{Tariq_2026_CVPR, author = {Tariq, Abdullah and Saleem, Bisma and Azad, R Muhammad Atif and Masek, Martin and Gilani, Syed Zulqarnain}, title = {ConSel: Concept-Aware Self-supervised Learning for Regression Beyond Ordinal Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6548-6559} }
ColorMam: Color-Aware State Space Model for Image Color Style Transfer: Jian Li,

Jiaxin Peng,

Yuchen Li,

Siwang Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jian and Peng, Jiaxin and Li, Yuchen and Zhou, Siwang}, title = {ColorMam: Color-Aware State Space Model for Image Color Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4379-4388} }
Mull-Tokens: Modality-Agnostic Latent Thinking: Arijit Ray,

Ahmed Abdelkader,

Chengzhi Mao,

Bryan A. Plummer,

Kate Saenko,

Ranjay Krishna,

Leonidas Guibas,

Wen-Sheng Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ray_2026_CVPR, author = {Ray, Arijit and Abdelkader, Ahmed and Mao, Chengzhi and Plummer, Bryan A. and Saenko, Kate and Krishna, Ranjay and Guibas, Leonidas and Chu, Wen-Sheng}, title = {Mull-Tokens: Modality-Agnostic Latent Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9477-9488} }
RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models: Omar Alama,

Darshil Jariwala,

Avigyan Bhattacharya,

Seungchan Kim,

Wenshan Wang,

Sebastian Scherer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alama_2026_CVPR, author = {Alama, Omar and Jariwala, Darshil and Bhattacharya, Avigyan and Kim, Seungchan and Wang, Wenshan and Scherer, Sebastian}, title = {RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9294-9304} }
Bind-Your-Avatar: Multi-Character-Talking Video Generation with Dynamic 3D-mask-based Embedding Router: Yubo Huang,

Weiqiang Wang,

Sirui Zhao,

Tong Xu,

Lin Liu,

Enhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yubo and Wang, Weiqiang and Zhao, Sirui and Xu, Tong and Liu, Lin and Chen, Enhong}, title = {Bind-Your-Avatar: Multi-Character-Talking Video Generation with Dynamic 3D-mask-based Embedding Router}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4440-4449} }
Hoi3DGen: Generating High-Quality Human-Object-Interactions in 3D: Agniv Sharma,

Xianghui Xie,

Tom Fischer,

Eddy Ilg,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Agniv and Xie, Xianghui and Fischer, Tom and Ilg, Eddy and Pons-Moll, Gerard}, title = {Hoi3DGen: Generating High-Quality Human-Object-Interactions in 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3405-3416} }
ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph: Junhao Cai,

Deyu Zeng,

Junhao Pang,

Lini Li,

Xiaopin Zhong,

Zongze Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Junhao and Zeng, Deyu and Pang, Junhao and Li, Lini and Zhong, Xiaopin and Wu, Zongze}, title = {ForgeDreamer: Industrial Text-to-3D Generation with Multi-Expert LoRA and Cross-View Hypergraph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {295-305} }
UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics: Joseph Raj Vishal,

Nagasiri Poluri,

Katha Naik,

Rutuja Patil,

Kashyap Hegde Kota,

Krishna Vinod,

Prithvi Jai Ramesh,

Mohammad Farhadi,

Yezhou Yang,

Bharatesh Chakravarthi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vishal_2026_CVPR, author = {Vishal, Joseph Raj and Poluri, Nagasiri and Naik, Katha and Patil, Rutuja and Kota, Kashyap Hegde and Vinod, Krishna and Ramesh, Prithvi Jai and Farhadi, Mohammad and Yang, Yezhou and Chakravarthi, Bharatesh}, title = {UDVideoQA: A Traffic Video Question Answering Dataset for Multi-Object Spatio-Temporal Reasoning in Urban Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1862-1871} }
Unbiased Dynamic Multimodal Fusion: Shicai Wei,

Kaijie Zhang,

Luyi Chen,

Tao He,

Guiduo Duan; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Shicai and Zhang, Kaijie and Chen, Luyi and He, Tao and Duan, Guiduo}, title = {Unbiased Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6239-6249} }
Future Optical Flow Prediction Improves Robot Control and Video Generation: Kanchana Ranasinghe,

Honglu Zhou,

Yu Fang,

Luyu Yang,

Le Xue,

Ran Xu,

Caiming Xiong,

Silvio Savarese,

Michael S Ryoo,

Juan Carlos Niebles; [pdf] [supp]
[bibtex]
@InProceedings{Ranasinghe_2026_CVPR, author = {Ranasinghe, Kanchana and Zhou, Honglu and Fang, Yu and Yang, Luyu and Xue, Le and Xu, Ran and Xiong, Caiming and Savarese, Silvio and Ryoo, Michael S and Niebles, Juan Carlos}, title = {Future Optical Flow Prediction Improves Robot Control and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4528-4540} }
ExposeAnyone: Personalized Audio-to-Expression Diffusion Models Are Robust Zero-Shot Face Forgery Detectors: Kaede Shiohara,

Toshihiko Yamasaki,

Vladislav Golyanik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shiohara_2026_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko and Golyanik, Vladislav}, title = {ExposeAnyone: Personalized Audio-to-Expression Diffusion Models Are Robust Zero-Shot Face Forgery Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3665-3676} }
SPHINX: A Synthetic Environment for Visual Perception and Reasoning: Md Tanvirul Alam,

Saksham Aggarwal,

Justin Yang Chae,

Nidhi Rastogi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alam_2026_CVPR, author = {Alam, Md Tanvirul and Aggarwal, Saksham and Chae, Justin Yang and Rastogi, Nidhi}, title = {SPHINX: A Synthetic Environment for Visual Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9489-9499} }
MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments: Zhiyu Huang,

Zewei Zhou,

Tianhui Cai,

Yun Zhang,

Jiaqi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhiyu and Zhou, Zewei and Cai, Tianhui and Zhang, Yun and Ma, Jiaqi}, title = {MDG: Masked Denoising Generation for Multi-Agent Behavior Modeling in Traffic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {878-888} }
OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism: Jordan Shipard,

Arnold Wiliem,

Kien Nguyen Thanh,

Wei Xiang,

Clinton Fookes; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shipard_2026_CVPR, author = {Shipard, Jordan and Wiliem, Arnold and Thanh, Kien Nguyen and Xiang, Wei and Fookes, Clinton}, title = {OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6768-6778} }
OminiControl2: Efficient Conditioning for Diffusion Transformers: Zhenxiong Tan,

Qiaochu Xue,

Xingyi Yang,

Songhua Liu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Zhenxiong and Xue, Qiaochu and Yang, Xingyi and Liu, Songhua and Wang, Xinchao}, title = {OminiControl2: Efficient Conditioning for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4256-4265} }
Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models: Woojun Jung,

Jaehoon Go,

Mingyu Jeon,

Sunjae Yoon,

Junyeong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Woojun and Go, Jaehoon and Jeon, Mingyu and Yoon, Sunjae and Kim, Junyeong}, title = {Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8962-8971} }
HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models: Zhinan Xie,

Peisong Wang,

Shuang Qiu,

Jian Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Zhinan and Wang, Peisong and Qiu, Shuang and Cheng, Jian}, title = {HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8952-8961} }
MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs: Zhilin Lin,

Zhihui Zhang,

Shiliang Sun,

Jing Zhao,

Hao Yang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zhilin and Zhang, Zhihui and Sun, Shiliang and Zhao, Jing and Yang, Hao}, title = {MathAll: A Real-World Benchmark for Mathematical Reasoning and Cross-Modal Understanding Evaluation in Omni-MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2089-2099} }
Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification: Inès Hyeonsu Kim,

Woojeong Jin,

Soowon Son,

Junyoung Seo,

Seokju Cho,

JeongYeol Baek,

Byeongwon Lee,

JoungBin Lee,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Jin, Woojeong and Son, Soowon and Seo, Junyoung and Cho, Seokju and Baek, JeongYeol and Lee, Byeongwon and Lee, JoungBin and Kim, Seungryong}, title = {Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8640-8650} }
Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation: Jianhang Ji,

Zhiming Cheng,

Jianxiang Zhao,

Bingtao Ma,

Hao Chen,

Yuhan Gao,

Lian Zhang,

Zuobin Ying,

Shuai Wang; [pdf]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Jianhang and Cheng, Zhiming and Zhao, Jianxiang and Ma, Bingtao and Chen, Hao and Gao, Yuhan and Zhang, Lian and Ying, Zuobin and Wang, Shuai}, title = {Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7799-7809} }
Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space: Chengzhi Liu,

Yuzhe Yang,

Yue Fan,

Qingyue Wei,

Sheng Liu,

Xin Eric Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chengzhi and Yang, Yuzhe and Fan, Yue and Wei, Qingyue and Liu, Sheng and Wang, Xin Eric}, title = {Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9225-9236} }
ProSM: Progressive Soft Masking for Fine-Grained Remote Image Segmentation: Bingkun Nian,

Fenghe Tang,

Zhiwei Ning,

Dongsheng Jiang,

Yin Li,

JIE Yang,

Rong Xiao,

Shaohua Kevin Zhou,

Wei Liu; [pdf]
[bibtex]
@InProceedings{Nian_2026_CVPR, author = {Nian, Bingkun and Tang, Fenghe and Ning, Zhiwei and Jiang, Dongsheng and Li, Yin and Yang, JIE and Xiao, Rong and Zhou, Shaohua Kevin and Liu, Wei}, title = {ProSM: Progressive Soft Masking for Fine-Grained Remote Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6371-6381} }
SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units: Ruibin Wang,

Zhenyu Lin,

Xinhai Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ruibin and Lin, Zhenyu and Zhao, Xinhai}, title = {SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8796-8805} }
Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM: Cabrel Wouladje,

Golden Tendekai Mumanikidzwa,

Md Apon Islam,

Huiying Xu,

Hongbo Li,

Wenzhe Tan,

Zhendong Chen,

Xinzhong Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wouladje_2026_CVPR, author = {Wouladje, Cabrel and Mumanikidzwa, Golden Tendekai and Islam, Md Apon and Xu, Huiying and Li, Hongbo and Tan, Wenzhe and Chen, Zhendong and Zhu, Xinzhong}, title = {Temporally-Smooth Global Bundle Adjustment for Real-Time Dense Visual SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1567-1576} }
Instant Colorization of Gaussian Splats: Daniel Lieber,

Alexander Mock,

Nils Wandel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lieber_2026_CVPR, author = {Lieber, Daniel and Mock, Alexander and Wandel, Nils}, title = {Instant Colorization of Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {170-180} }
Low-Bitrate Video Compression through Semantic-Conditioned Diffusion: Lingdong Wang,

Guan-Ming Su,

Divya Kothandaraman,

Tsung-Wei Huang,

Mohammad Hajiesmaili,

Ramesh K. Sitaraman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lingdong and Su, Guan-Ming and Kothandaraman, Divya and Huang, Tsung-Wei and Hajiesmaili, Mohammad and Sitaraman, Ramesh K.}, title = {Low-Bitrate Video Compression through Semantic-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4495-4505} }
Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning: Yibing Wei,

Sudeep Katakol,

Manuel Brack,

Jinhong Lin,

Haoyue Bai,

Yu-Teng Li,

Richard Zhang,

Eli Shechtman,

Hareesh Ravi,

Ajinkya Kale; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yibing and Katakol, Sudeep and Brack, Manuel and Lin, Jinhong and Bai, Haoyue and Li, Yu-Teng and Zhang, Richard and Shechtman, Eli and Ravi, Hareesh and Kale, Ajinkya}, title = {Towards Text-Guided Attribute-Disentangled Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1883-1892} }
Rethinking Medical High-Modality Learning Under Missingness -- A Long-Tailed Distribution Perspective: Chenwei Wu,

Zitao Shuai,

Liyue Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenwei and Shuai, Zitao and Shen, Liyue}, title = {Rethinking Medical High-Modality Learning Under Missingness -- A Long-Tailed Distribution Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5641-5651} }
Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation: Hongli Liu,

Yu Wang,

Shengjie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hongli and Wang, Yu and Zhao, Shengjie}, title = {Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7419-7428} }
Uncertainty-Aware Cross-Modal Opinion Interaction: A General Frameworkfor Visible-Infrared Vehicle and Person Re-Identification: Shihao Shan,

Hongying Liu,

Fanhua Shang,

Qian Wang,

Yang Song; [pdf]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Shihao and Liu, Hongying and Shang, Fanhua and Wang, Qian and Song, Yang}, title = {Uncertainty-Aware Cross-Modal Opinion Interaction: A General Frameworkfor Visible-Infrared Vehicle and Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6476-6485} }
A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models: Mehmet Demirel,

Christos Kyrkou; [pdf] [supp]
[bibtex]
@InProceedings{Demirel_2026_CVPR, author = {Demirel, Mehmet and Kyrkou, Christos}, title = {A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7541-7551} }
Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment: Huaiyuan Qin,

Muli Yang,

Gabriel James Goenawan,

Kai Wang,

Zheng Wang,

Peng Hu,

Xi Peng,

Hongyuan Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Huaiyuan and Yang, Muli and Goenawan, Gabriel James and Wang, Kai and Wang, Zheng and Hu, Peng and Peng, Xi and Zhu, Hongyuan}, title = {Beyond Loss Values: Robust Dynamic Pruning via Loss Trajectory Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3029-3039} }
Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps: Xiangjun Gao,

Zhensong Zhang,

Dave Zhenyu Chen,

Songcen Xu,

Long Quan,

Eduardo Pérez-Pellitero,

Youngkyoon Jang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Xiangjun and Zhang, Zhensong and Chen, Dave Zhenyu and Xu, Songcen and Quan, Long and P\'erez-Pellitero, Eduardo and Jang, Youngkyoon}, title = {Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7154-7164} }
On the Group Disparities Arising from Machine Unlearning: Zijie Pan,

Zuobin Ying,

Yajie Wang,

Liehuang Zhu,

Wanlei Zhou; [pdf]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zijie and Ying, Zuobin and Wang, Yajie and Zhu, Liehuang and Zhou, Wanlei}, title = {On the Group Disparities Arising from Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8133-8142} }
Language-Augmented Semantic Priors for B-Spline Surface Fitting: Yunzhong Lou,

Yusheng Luo,

Jiahao Li,

Yu Song,

Xiangdong Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Lou_2026_CVPR, author = {Lou, Yunzhong and Luo, Yusheng and Li, Jiahao and Song, Yu and Zhou, Xiangdong}, title = {Language-Augmented Semantic Priors for B-Spline Surface Fitting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9120-9130} }
FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning: Jingchen Ni,

Quan Zhang,

Dan Jiang,

Keyu Lv,

Ke Zhang,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Jingchen and Zhang, Quan and Jiang, Dan and Lv, Keyu and Zhang, Ke and Yuan, Chun}, title = {FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7439-7449} }
Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams: Zhenghui Guo,

Yuanbin Man,

Junyuan Sheng,

Bowen Lin,

Ahmed Ahmed,

Bo Jiang,

Boyuan Zhang,

Miao Yin,

Sian Jin,

Omprakash Gnawali,

Chengming Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Zhenghui and Man, Yuanbin and Sheng, Junyuan and Lin, Bowen and Ahmed, Ahmed and Jiang, Bo and Zhang, Boyuan and Yin, Miao and Jin, Sian and Gnawali, Omprakash and Zhang, Chengming}, title = {Event-VStream: Event-Driven Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3060-3069} }
Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection: Alex Costanzino,

Pierluigi Zama Ramirez,

Giuseppe Lisanti,

Luigi Di Stefano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Costanzino_2026_CVPR, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8816-8825} }
LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images: Yilong Liu,

Wanhua Li,

Chen Zhu-Tian,

Hanspeter Pfister; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yilong and Li, Wanhua and Zhu-Tian, Chen and Pfister, Hanspeter}, title = {LangFlash: Feed-forward 3D Language Gaussian Splatting from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {191-201} }
Harnessing Layered Graphic Designs with Real Intentions for Text-to-Design Generation: Xinya Song,

Bo Yang,

Ying Cao; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Xinya and Yang, Bo and Cao, Ying}, title = {Harnessing Layered Graphic Designs with Real Intentions for Text-to-Design Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4729-4738} }
COSTA: Collaborative Open-Set Test-Time Adaptation Through Robust Prototype Learning: Can Zhang,

Ruirui Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Can and Li, Ruirui}, title = {COSTA: Collaborative Open-Set Test-Time Adaptation Through Robust Prototype Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6154-6163} }
Generative Vision-Language Multiple Instance Learning for Weakly Supervised Neonatal Fundus Screening and Reporting: Xiao Zhang,

Guangshuang Tan,

Jie Hu,

Shichao Kan,

Bing Jiang,

Yixiong Liang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiao and Tan, Guangshuang and Hu, Jie and Kan, Shichao and Jiang, Bing and Liang, Yixiong}, title = {Generative Vision-Language Multiple Instance Learning for Weakly Supervised Neonatal Fundus Screening and Reporting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5190-5200} }
World Model Robustness via Surprise Recognition: Geigh Zollicoffer,

Tanush Chopra,

Mingkuan Yan,

Xiaoxu Ma,

Kenneth Eaton,

Mark Riedl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zollicoffer_2026_CVPR, author = {Zollicoffer, Geigh and Chopra, Tanush and Yan, Mingkuan and Ma, Xiaoxu and Eaton, Kenneth and Riedl, Mark}, title = {World Model Robustness via Surprise Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3146-3155} }
CoherentHand: Temporally Consistent 3D Hand Trajectory Synthesis with Semantic Motion Priors: Bikram Boote,

Junho Kim,

Ozgur Kara,

Sangmin Lee,

James M Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Boote_2026_CVPR, author = {Boote, Bikram and Kim, Junho and Kara, Ozgur and Lee, Sangmin and Rehg, James M}, title = {CoherentHand: Temporally Consistent 3D Hand Trajectory Synthesis with Semantic Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3417-3427} }
U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration: Michael Smith,

Frank P. Ferrie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Smith_2026_CVPR, author = {Smith, Michael and Ferrie, Frank P.}, title = {U-SEG: Uncertainty in SEGmentation - A systematic multi-variable exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1850-1861} }
Learning Spatial-Preserving Hierarchical Representations for Digital Pathology: Weiyi Wu,

Xingjian Diao,

Chunhui Zhang,

Chongyang Gao,

Xinwen Xu,

Siting Li,

Jiang Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Weiyi and Diao, Xingjian and Zhang, Chunhui and Gao, Chongyang and Xu, Xinwen and Li, Siting and Gui, Jiang}, title = {Learning Spatial-Preserving Hierarchical Representations for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5484-5494} }
Learning to Walk the Right Paths: Task-Responsive Graph Reasoning for Multimodal Inference: Xuecheng Li,

Weikuan Jia,

Yuanjie Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuecheng and Jia, Weikuan and Zheng, Yuanjie}, title = {Learning to Walk the Right Paths: Task-Responsive Graph Reasoning for Multimodal Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6040-6050} }
Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety: Younggun Kim,

Sirnam Swetha,

Fazil Kagdi,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Younggun and Swetha, Sirnam and Kagdi, Fazil and Shah, Mubarak}, title = {Safe-LLaVA: A Privacy-Preserving Vision Language Dataset and Benchmark for Biometric Safety}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2100-2110} }
RectifiedHR: Enable Efficient High-Resolution Synthesis via Energy Rectification: Zhen Yang,

Guibao Shen,

Minyang Li,

Liang Hou,

Mushui Liu,

Luozhou Wang,

Xin Tao,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhen and Shen, Guibao and Li, Minyang and Hou, Liang and Liu, Mushui and Wang, Luozhou and Tao, Xin and Chen, Ying-Cong}, title = {RectifiedHR: Enable Efficient High-Resolution Synthesis via Energy Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3809-3819} }
PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models: Jinkai Li,

Zhenliang Zhang,

Lifeng Fan,

Wei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinkai and Zhang, Zhenliang and Fan, Lifeng and Wang, Wei}, title = {PureSpace: A Benchmark for Abstract Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1976-1985} }
MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking: Yisong Liu,

He Yao,

Junlong Cheng,

Yujie Lu,

Junqi Bai,

Min Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yisong and Yao, He and Cheng, Junlong and Lu, Yujie and Bai, Junqi and Zhu, Min}, title = {MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8378-8387} }
Video Reasoning Without Training: Deepak Sridhar,

Kartikeya Bhardwaj,

Jeya Pradha Jeyaraj,

Nuno Vasconcelos,

Ankita Nayak,

Harris Teague; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Deepak and Bhardwaj, Kartikeya and Jeyaraj, Jeya Pradha and Vasconcelos, Nuno and Nayak, Ankita and Teague, Harris}, title = {Video Reasoning Without Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6250-6260} }
LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers: Md Abtahi Majeed Chowdhury,

Md Rifat Ur Rahman,

Akil Ahmad Taki; [pdf] [supp]
[bibtex]
@InProceedings{Chowdhury_2026_CVPR, author = {Chowdhury, Md Abtahi Majeed and Rahman, Md Rifat Ur and Taki, Akil Ahmad}, title = {LOOPE: Learnable Optimal Patch Order for Positional Encoders in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {1640-1649} }
Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression: Haihao Zhang,

Siwei Dong,

Jianing Li,

Rui Zhao,

Yunjian Zhang,

Geng Qin,

Lin Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haihao and Dong, Siwei and Li, Jianing and Zhao, Rui and Zhang, Yunjian and Qin, Geng and Zhu, Lin}, title = {Unleashing the Potential of Event-Based Stereo Via Coarse-to-Fine Bio-Inspired Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3220-3230} }
CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection: Qiyu Chen,

Zhen Qu,

Wei Luo,

Haiming Yao,

Yunkang Cao,

Yuxin Jiang,

Yinan Duan,

Huiyuan Luo,

Chengkan Lv,

Zhengtao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qiyu and Qu, Zhen and Luo, Wei and Yao, Haiming and Cao, Yunkang and Jiang, Yuxin and Duan, Yinan and Luo, Huiyuan and Lv, Chengkan and Zhang, Zhengtao}, title = {CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8554-8563} }
Test-Time Distillation for Continual Model Adaptation: Xiao Chen,

Jiazhen Huang,

Zhiming Liu,

Qinting Jiang,

Fanding Huang,

Jingyan Jiang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Huang, Jiazhen and Liu, Zhiming and Jiang, Qinting and Huang, Fanding and Jiang, Jingyan and Wang, Zhi}, title = {Test-Time Distillation for Continual Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7593-7604} }
Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality: Yanming Xiu,

Zhengyuan Jiang,

Neil Zhenqiang Gong,

Maria Gorlatova; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiu_2026_CVPR, author = {Xiu, Yanming and Jiang, Zhengyuan and Gong, Neil Zhenqiang and Gorlatova, Maria}, title = {Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9110-9119} }
Disrupting Positional Encoding for Effective Open Set Recognition: Yu Wang,

Jiabo Xie,

Yucan Zhou,

Junxian Mu,

Qinghua Hu,

Pengfei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yu and Xie, Jiabo and Zhou, Yucan and Mu, Junxian and Hu, Qinghua and Zhu, Pengfei}, title = {Disrupting Positional Encoding for Effective Open Set Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6633-6642} }
Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback: Jianglin Lu,

Yuanwei Wu,

Ziyi Zhao,

Hongcheng Wang,

Felix Jimenez,

Abrar Majeedi,

Yun Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jianglin and Wu, Yuanwei and Zhao, Ziyi and Wang, Hongcheng and Jimenez, Felix and Majeedi, Abrar and Fu, Yun}, title = {Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8629-8639} }
Towards Source-Aware Object Swapping with Initial Noise Perturbation: Jiahui Zhan,

Xianbing Sun,

Xiangnan Zhu,

Yikun Ji,

Ruitong Liu,

Liqing Zhang,

Jianfu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jiahui and Sun, Xianbing and Zhu, Xiangnan and Ji, Yikun and Liu, Ruitong and Zhang, Liqing and Zhang, Jianfu}, title = {Towards Source-Aware Object Swapping with Initial Noise Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4400-4409} }
SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense: Patryk Krukowski,

Lukasz Gorczyca,

Piotr Helm,

Kamil Ksiazek,

Przemyslaw Spurek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krukowski_2026_CVPR, author = {Krukowski, Patryk and Gorczyca, Lukasz and Helm, Piotr and Ksiazek, Kamil and Spurek, Przemyslaw}, title = {SHIELD: Secure Hypernetworks for Incremental Expansion Learning Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2377-2386} }
MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection: Xinying Li,

Junfeng Jing,

Tong Wu,

Tian Gao,

Zhihong Sheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinying and Jing, Junfeng and Wu, Tong and Gao, Tian and Sheng, Zhihong}, title = {MegAD: An Expert in Meta-Learning Guided Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2585-2595} }
Bi-Level Optimization for Single Domain Generalization: Marzi Heidari,

Hanping Zhang,

Hao Yan,

Yuhong Guo; [pdf] [arXiv]
[bibtex]
@InProceedings{Heidari_2026_CVPR, author = {Heidari, Marzi and Zhang, Hanping and Yan, Hao and Guo, Yuhong}, title = {Bi-Level Optimization for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6685-6694} }
NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks: Pengcheng Chen,

Yue Hu,

Wenhao Li,

Nicole M Gunderson,

Andrew Feng,

Zhenglong Sun,

Peter Beerel,

Eric J Seibel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Pengcheng and Hu, Yue and Li, Wenhao and Gunderson, Nicole M and Feng, Andrew and Sun, Zhenglong and Beerel, Peter and Seibel, Eric J}, title = {NeVStereo: A NeRF-Driven NVS-Stereo Architecture for High-Fidelity 3D Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {403-413} }
FLToM: Robust Federated Learning with Theory-of-Mind Structure: Tianshu Xiao,

Liu Yang,

Sichang Guo,

Qilong Wang,

Qinghua Hu; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Tianshu and Yang, Liu and Guo, Sichang and Wang, Qilong and Hu, Qinghua}, title = {FLToM: Robust Federated Learning with Theory-of-Mind Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {2503-2513} }
TalkVerse: Democratizing Minute-Long Audio-Driven Video Generation: Zhenzhi Wang,

Jian Wang,

Ke Ma,

Dahua Lin,

Bing Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhenzhi and Wang, Jian and Ma, Ke and Lin, Dahua and Zhou, Bing}, title = {TalkVerse: Democratizing Minute-Long Audio-Driven Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4516-4527} }
RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes: Michael Baltaxe,

Dan Levi,

Sagie Benaim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baltaxe_2026_CVPR, author = {Baltaxe, Michael and Levi, Dan and Benaim, Sagie}, title = {RAD: Retrieval-Augmented Monocular Metric Depth Estimation for Underrepresented Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {558-568} }
EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction: Xinan Zhang,

Muhammad Zubair Irshad,

Anthony Yezzi,

Yi-Chang Tsai,

Zsolt Kira; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinan and Irshad, Muhammad Zubair and Yezzi, Anthony and Tsai, Yi-Chang and Kira, Zsolt}, title = {EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8846-8856} }
G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction: Chao Ning,

Naoto Yokoya; [pdf] [supp]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Yokoya, Naoto}, title = {G2I: Transitioning a Generalized Monocular Depth Estimation Model to In-Domain Metric Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {518-527} }
Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents: Nishad Singhi,

Christian Bialas,

Snehal Jauhri,

Vignesh Prasad,

Georgia Chalvatzaki,

Marcus Rohrbach,

Anna Rohrbach; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singhi_2026_CVPR, author = {Singhi, Nishad and Bialas, Christian and Jauhri, Snehal and Prasad, Vignesh and Chalvatzaki, Georgia and Rohrbach, Marcus and Rohrbach, Anna}, title = {Think Twice, Act Once: Verifier-Guided Action Selection For Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3124-3135} }
Linear Recurrent Unit with Semantic Modulation for Image Super-Resolution: Mingyu Choi,

Woo Kyoung Han,

Sunghoon Im,

Kyong Hwan Jin; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Mingyu and Han, Woo Kyoung and Im, Sunghoon and Jin, Kyong Hwan}, title = {Linear Recurrent Unit with Semantic Modulation for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4950-4960} }
Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?: Zihao Dongfang,

Xu Zheng,

Ziqiao Weng,

Yuanhuiyi Lyu,

Danda Pani Paudel,

Luc Van Gool,

Kailun Yang,

Xuming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dongfang_2026_CVPR, author = {Dongfang, Zihao and Zheng, Xu and Weng, Ziqiao and Lyu, Yuanhuiyi and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun and Hu, Xuming}, title = {Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9759-9769} }
RodNet: Visual Pathway-Inspired Adaptive Sparse Network for Efficient Low-Light Image Enhancement: Boheng Liu,

Ziyu Li,

Zhong Zhang,

Mengrui Xu,

Chenghua Duan,

Dehao Liu,

Qing Li,

Xia Wu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Boheng and Li, Ziyu and Zhang, Zhong and Xu, Mengrui and Duan, Chenghua and Liu, Dehao and Li, Qing and Wu, Xia}, title = {RodNet: Visual Pathway-Inspired Adaptive Sparse Network for Efficient Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4961-4970} }
SCOPE: Spatially Ordered Continual Learning for 3D Segmentation: Wenhao Xu,

Huaidong Zhang,

Weipeng Zhang,

Qianle Zhang,

Shengfeng He; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Wenhao and Zhang, Huaidong and Zhang, Weipeng and Zhang, Qianle and He, Shengfeng}, title = {SCOPE: Spatially Ordered Continual Learning for 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7862-7871} }; Back