CVPR 2026 Open Access Repository

Papers

Back
Choreographing a World of Dynamic Objects: Yanzhe Lyu,

Chen Geng,

Karthik Dharmarajan,

Yunzhi Zhang,

Hadi Alzayer,

Shangzhe Wu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Yanzhe and Geng, Chen and Dharmarajan, Karthik and Zhang, Yunzhi and Alzayer, Hadi and Wu, Shangzhe and Wu, Jiajun}, title = {Choreographing a World of Dynamic Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32647-32658} }
HybridDriveVLA: Vision-Language-Action Model with Visual CoT reasoning and ToT Evaluation for Autonomous Driving: Yipene Cedric Francois Bassole,

Sungwoo Kim,

Jiwoo Jung,

Yunsick Sung; [pdf] [supp]
[bibtex]
@InProceedings{Bassole_2026_CVPR, author = {Bassole, Yipene Cedric Francois and Kim, Sungwoo and Jung, Jiwoo and Sung, Yunsick}, title = {HybridDriveVLA: Vision-Language-Action Model with Visual CoT reasoning and ToT Evaluation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32421-32430} }
Catalyst4D: High-Fidelity 3D-to-4D Scene Editing via Dynamic Propagation: Shifeng Chen,

Yihui Li,

Jun Liao,

Hongyu Yang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shifeng and Li, Yihui and Liao, Jun and Yang, Hongyu and Huang, Di}, title = {Catalyst4D: High-Fidelity 3D-to-4D Scene Editing via Dynamic Propagation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29792-29802} }
Cloning Deterministic Worlds: The Critical Role of Latent Geometry in Long-Horizon World Models: Zaishuo Xia,

Yukuan Lu,

Xinyi Li,

Yifan Xu,

Yubei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zaishuo and Lu, Yukuan and Li, Xinyi and Xu, Yifan and Chen, Yubei}, title = {Cloning Deterministic Worlds: The Critical Role of Latent Geometry in Long-Horizon World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32602-32612} }
CME-CAD: Heterogeneous Collaborative Multi-Expert Reinforcement Learning for CAD Code Generation: Ke Niu,

Haiyang Yu,

Zhuofan Chen,

Zhengtao Yao,

Weitao Jia,

Xiaodong Ge,

Jingqun Tang,

Benlei Cui,

Bin Li,

Xiangyang Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Ke and Yu, Haiyang and Chen, Zhuofan and Yao, Zhengtao and Jia, Weitao and Ge, Xiaodong and Tang, Jingqun and Cui, Benlei and Li, Bin and Xue, Xiangyang}, title = {CME-CAD: Heterogeneous Collaborative Multi-Expert Reinforcement Learning for CAD Code Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39272-39281} }
SynCLIP: Synonym-Coherent Language-Image Pretraining for Robust Open-Vocabulary Dense Perception: Mingjie Xie,

Guangjun He,

Dongli Xu,

Youtian Lin,

Hongjue Li,

Pengming Feng,

Jian Guan,

Yue Deng; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Mingjie and He, Guangjun and Xu, Dongli and Lin, Youtian and Li, Hongjue and Feng, Pengming and Guan, Jian and Deng, Yue}, title = {SynCLIP: Synonym-Coherent Language-Image Pretraining for Robust Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31524-31533} }
More than the Sum: Panorama-Language Models for Adverse Omni-Scenes: Weijia Fan,

Ruiping Liu,

Jiale Wei,

Yufan Chen,

Junwei Zheng,

Zichao Zeng,

Jiaming Zhang,

Qiufu Li,

Linlin Shen,

Rainer Stiefelhagen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Weijia and Liu, Ruiping and Wei, Jiale and Chen, Yufan and Zheng, Junwei and Zeng, Zichao and Zhang, Jiaming and Li, Qiufu and Shen, Linlin and Stiefelhagen, Rainer}, title = {More than the Sum: Panorama-Language Models for Adverse Omni-Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30874-30884} }
Anomaly-Related Residual Fields for Cross-domain Anomaly Detection: Kewei Gao,

Jiayi Xie,

Zhengda Shen,

Weijun Qin,

Lingxiang Jia,

Kejia Chen,

Zunlei Feng,

Yijun Bei; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Kewei and Xie, Jiayi and Shen, Zhengda and Qin, Weijun and Jia, Lingxiang and Chen, Kejia and Feng, Zunlei and Bei, Yijun}, title = {Anomaly-Related Residual Fields for Cross-domain Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35617-35627} }
DreamSR: Towards Ultra-High-Resolution Image Super-Resolution via a Receptive-Field Enhanced Diffusion Transformer: Qingji Dong,

Hang Dong,

Mingqin Chen,

Rui Zhang,

Yitong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Qingji and Dong, Hang and Chen, Mingqin and Zhang, Rui and Wang, Yitong}, title = {DreamSR: Towards Ultra-High-Resolution Image Super-Resolution via a Receptive-Field Enhanced Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38258-38269} }
VisRef: Visual Refocusing while Thinking Improves Test-Time Scaling in Multi-Modal Large Reasoning Models: Soumya Suvra Ghosal,

Youngeun Kim,

Zhuowei Li,

Ritwick Chaudhry,

Linghan Xu,

Hongjing Zhang,

Jakub Zablocki,

Yifan Xing,

Qin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosal_2026_CVPR, author = {Ghosal, Soumya Suvra and Kim, Youngeun and Li, Zhuowei and Chaudhry, Ritwick and Xu, Linghan and Zhang, Hongjing and Zablocki, Jakub and Xing, Yifan and Zhang, Qin}, title = {VisRef: Visual Refocusing while Thinking Improves Test-Time Scaling in Multi-Modal Large Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33404-33414} }
Enhancing the Security of Visual Speaker Authentication Based on Dynamic Lip-Print Analysis: Yi He,

Lei Yang,

Bofan Chen,

Shilin Wang; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yi and Yang, Lei and Chen, Bofan and Wang, Shilin}, title = {Enhancing the Security of Visual Speaker Authentication Based on Dynamic Lip-Print Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35462-35471} }
Beyond Mimicry: Learning Whole-Body Human-Humanoid Interaction from Human-Human Demonstrations: Wei-Jin Huang,

Yue-Yi Zhang,

Yi-Lin Wei,

Zhi-Wei Xia,

Juantao Tan,

Yuan-Ming Li,

Zhilin Zhao,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Wei-Jin and Zhang, Yue-Yi and Wei, Yi-Lin and Xia, Zhi-Wei and Tan, Juantao and Li, Yuan-Ming and Zhao, Zhilin and Zheng, Wei-Shi}, title = {Beyond Mimicry: Learning Whole-Body Human-Humanoid Interaction from Human-Human Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30740-30749} }
Boundary-Responsive Differentiable Gating for Superpixel-Based Segmentation: Fatmaelzahraa Ahmed,

Zhihe Lu,

Gianni Caro,

Diram Tabaa,

Mohamed Hamdy,

Muraam Abdel-Ghani,

Abdulaziz Al-Ali,

Muhammad Arsalan,

Shidin Balakrishnan; [pdf] [supp]
[bibtex]
@InProceedings{Ahmed_2026_CVPR, author = {Ahmed, Fatmaelzahraa and Lu, Zhihe and Caro, Gianni and Tabaa, Diram and Hamdy, Mohamed and Abdel-Ghani, Muraam and Al-Ali, Abdulaziz and Arsalan, Muhammad and Balakrishnan, Shidin}, title = {Boundary-Responsive Differentiable Gating for Superpixel-Based Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42137-42146} }
Rethinking Asymmetric Quantization: Hidden Symmetry in Vision Model Weights: Masafumi Mori,

Shinya Gongyo,

Mitsuru Ambai; [pdf] [supp]
[bibtex]
@InProceedings{Mori_2026_CVPR, author = {Mori, Masafumi and Gongyo, Shinya and Ambai, Mitsuru}, title = {Rethinking Asymmetric Quantization: Hidden Symmetry in Vision Model Weights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33609-33620} }
PR-IQA: Partial-Reference Image Quality Assessment for Diffusion-Based Novel View Synthesis: Inseong Choi,

Siwoo Lee,

Seung-Hun Nam,

Soohwan Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Inseong and Lee, Siwoo and Nam, Seung-Hun and Song, Soohwan}, title = {PR-IQA: Partial-Reference Image Quality Assessment for Diffusion-Based Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37301-37310} }
Fine-VAD: Towards Fine-Grained Video Anomaly Detection via Progressive Cross-Granularity Learning: Menghao Zhang,

Yiyan Zhu,

Pengfei Ren,

Haifeng Sun,

Qi Qi,

Zirui Zhuang,

Huazheng Wang,

Lei Zhang,

Jianxin Liao,

Jingyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Menghao and Zhu, Yiyan and Ren, Pengfei and Sun, Haifeng and Qi, Qi and Zhuang, Zirui and Wang, Huazheng and Zhang, Lei and Liao, Jianxin and Wang, Jingyu}, title = {Fine-VAD: Towards Fine-Grained Video Anomaly Detection via Progressive Cross-Granularity Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35514-35523} }
CausalVAD: De-confounding End-to-End Autonomous Driving via Causal Intervention: Jiacheng Tang,

Zhiyuan Zhou,

Zhuolin He,

Jia Zhang,

Kai Zhang,

Jian Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jiacheng and Zhou, Zhiyuan and He, Zhuolin and Zhang, Jia and Zhang, Kai and Pu, Jian}, title = {CausalVAD: De-confounding End-to-End Autonomous Driving via Causal Intervention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32124-32133} }
ReGenHOI: Unifying Reconstruction and Generation for 3D Human-Object Interaction Understanding: Miao Xu,

Xiangyu Zhu,

Zidu Wang,

Xusheng Liang,

Bao Li,

Jinlin Wu,

Zelin Zang,

Zhen Lei; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Miao and Zhu, Xiangyu and Wang, Zidu and Liang, Xusheng and Li, Bao and Wu, Jinlin and Zang, Zelin and Lei, Zhen}, title = {ReGenHOI: Unifying Reconstruction and Generation for 3D Human-Object Interaction Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42847-42857} }
Masked Region Transformer for Layered Image Generation and Editing at Scale: Zhicong Tang,

Jingye Chen,

Zhao Zhang,

Mohan Zhou,

Yuchi Liu,

Yifan Pu,

Yalong Bai,

Ethan Smith,

Yuhui Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zhicong and Chen, Jingye and Zhang, Zhao and Zhou, Mohan and Liu, Yuchi and Pu, Yifan and Bai, Yalong and Smith, Ethan and Yuan, Yuhui}, title = {Masked Region Transformer for Layered Image Generation and Editing at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40622-40632} }
DROID-SLAM in the Wild: Moyang Li,

Zihan Zhu,

Marc Pollefeys,

Daniel Barath; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Moyang and Zhu, Zihan and Pollefeys, Marc and Barath, Daniel}, title = {DROID-SLAM in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36498-36508} }
Intrinsic Concept Extraction Based on Compositional Interpretability: Hanyu Shi,

Hong Tao,

Guoheng Huang,

Jianbin Jiang,

Xuhang Chen,

Chi-Man Pun,

Shanhu Wang,

Pan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Hanyu and Tao, Hong and Huang, Guoheng and Jiang, Jianbin and Chen, Xuhang and Pun, Chi-Man and Wang, Shanhu and Pan, Pan}, title = {Intrinsic Concept Extraction Based on Compositional Interpretability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38969-38978} }
Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing: Rishubh Parihar,

Or Patashnik,

Daniil Ostashev,

Venkatesh Babu Radhakrishnan,

Daniel Cohen-Or,

Kuan-Chieh Jackson Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parihar_2026_CVPR, author = {Parihar, Rishubh and Patashnik, Or and Ostashev, Daniil and Radhakrishnan, Venkatesh Babu and Cohen-Or, Daniel and Wang, Kuan-Chieh Jackson}, title = {Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37929-37939} }
ExtrinSplat: Decoupling Geometry and Semantics for Open-Vocabulary Understanding in 3D Gaussian Splatting: Jiayu Ding,

Xinpeng Liu,

Zhiyi Pan,

Shiqiang Long,

Ge Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Jiayu and Liu, Xinpeng and Pan, Zhiyi and Long, Shiqiang and Li, Ge}, title = {ExtrinSplat: Decoupling Geometry and Semantics for Open-Vocabulary Understanding in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31019-31028} }
StyleDoctor: Towards Specialist Reward Model for Style-centric Generation Tasks: Xilin He,

Xiaole Xian,

Xiangyu Yue,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xilin and Xian, Xiaole and Yue, Xiangyu and Khan, Muhammad Haris}, title = {StyleDoctor: Towards Specialist Reward Model for Style-centric Generation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29126-29135} }
From Events to Clarity: The Event-Guided Diffusion Framework for Dehazing: Ling Wang,

Yunfan Lu,

Wenzong Ma,

Huizai Yao,

Pengteng Li,

Hui Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ling and Lu, Yunfan and Ma, Wenzong and Yao, Huizai and Li, Pengteng and Xiong, Hui}, title = {From Events to Clarity: The Event-Guided Diffusion Framework for Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34028-34039} }
Beyond Text Prompts: Precise Concept Erasure through Text-Image Collaboration: Jun Li,

Lizhi Xiong,

Ziqiang Li,

Weiwei Jiang,

Zhangjie Fu,

Yong Li,

Guo-Sen Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jun and Xiong, Lizhi and Li, Ziqiang and Jiang, Weiwei and Fu, Zhangjie and Li, Yong and Xie, Guo-Sen}, title = {Beyond Text Prompts: Precise Concept Erasure through Text-Image Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37653-37663} }
FlowMotion: Training-Free Flow Guidance for Video Motion Transfer: Zhen Wang,

Youcan Xu,

Jun Xiao,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhen and Xu, Youcan and Xiao, Jun and Chen, Long}, title = {FlowMotion: Training-Free Flow Guidance for Video Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38143-38153} }
Visual Diffusion Models are Geometric Solvers: Nir Goren,

Shai Yehezkel,

Omer Dahary,

Andrey Voynov,

Or Patashnik,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goren_2026_CVPR, author = {Goren, Nir and Yehezkel, Shai and Dahary, Omer and Voynov, Andrey and Patashnik, Or and Cohen-Or, Daniel}, title = {Visual Diffusion Models are Geometric Solvers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43187-43196} }
Image Diffusion Preview with Consistency Solver: Fu-Yun Wang,

Hao Zhou,

Liangzhe Yuan,

Sanghyun Woo,

Boqing Gong,

Bohyung Han,

Ming-Hsuan Yang,

Han Zhang,

Yukun Zhu,

Ting Liu,

Long Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Fu-Yun and Zhou, Hao and Yuan, Liangzhe and Woo, Sanghyun and Gong, Boqing and Han, Bohyung and Yang, Ming-Hsuan and Zhang, Han and Zhu, Yukun and Liu, Ting and Zhao, Long}, title = {Image Diffusion Preview with Consistency Solver}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43271-43280} }
Resolving the Identity Crisis in Text-to-Image Generation: Shubhankar Borse,

Farzad Farhadzadeh,

Munawar Hayat,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Borse_2026_CVPR, author = {Borse, Shubhankar and Farhadzadeh, Farzad and Hayat, Munawar and Porikli, Fatih}, title = {Resolving the Identity Crisis in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36703-36712} }
High-Quality and Efficient Turbulence Mitigation with Events: Xiaoran Zhang,

Jian Ding,

Yuxing Duan,

Haoyue Liu,

Gang Chen,

Yi Chang,

Luxin Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoran and Ding, Jian and Duan, Yuxing and Liu, Haoyue and Chen, Gang and Chang, Yi and Yan, Luxin}, title = {High-Quality and Efficient Turbulence Mitigation with Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29514-29525} }
SEA-Vision: A Multilingual Benchmark for Comprehensive Document and Scene Text Understanding in Southeast Asia: Pengfei Yue,

Xingran Zhao,

Juntao Chen,

Peng Hou,

Wang Longchao,

Jianghang Lin,

Shengchuan Zhang,

Anxiang Zeng,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Pengfei and Zhao, Xingran and Chen, Juntao and Hou, Peng and Longchao, Wang and Lin, Jianghang and Zhang, Shengchuan and Zeng, Anxiang and Cao, Liujuan}, title = {SEA-Vision: A Multilingual Benchmark for Comprehensive Document and Scene Text Understanding in Southeast Asia}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30895-30905} }
VITAL: Vision-Encoder-centered Pre-training for LMMs in Visual Quality Assessment: Ziheng Jia,

Linhan Cao,

Jinliang Han,

Zicheng Zhang,

Jiaying Qian,

Jiarui Wang,

Zijian Chen,

Guangtao Zhai,

Xiongkuo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Ziheng and Cao, Linhan and Han, Jinliang and Zhang, Zicheng and Qian, Jiaying and Wang, Jiarui and Chen, Zijian and Zhai, Guangtao and Min, Xiongkuo}, title = {VITAL: Vision-Encoder-centered Pre-training for LMMs in Visual Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41245-41255} }
Jailbreaking Vision-Language Models via Dissonance-Guided Suffix Optimization and Image-Phrase Injection: Jiacheng Pi,

Zhiguo Yang,

Xingxing Huang,

Dongsheng Xu,

Ruizhi Zhong,

Wenjie Ruan; [pdf] [supp]
[bibtex]
@InProceedings{Pi_2026_CVPR, author = {Pi, Jiacheng and Yang, Zhiguo and Huang, Xingxing and Xu, Dongsheng and Zhong, Ruizhi and Ruan, Wenjie}, title = {Jailbreaking Vision-Language Models via Dissonance-Guided Suffix Optimization and Image-Phrase Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30087-30097} }
SHARP: Short-Window Streaming for Accurate and Robust Prediction in Motion Forecasting: Alexander Prutsch,

Christian Fruhwirth-Reisinger,

David Schinagl,

Horst Possegger; [pdf] [supp]
[bibtex]
@InProceedings{Prutsch_2026_CVPR, author = {Prutsch, Alexander and Fruhwirth-Reisinger, Christian and Schinagl, David and Possegger, Horst}, title = {SHARP: Short-Window Streaming for Accurate and Robust Prediction in Motion Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32103-32112} }
SPAN: Spatial-Projection Alignment for Monocular 3D Object Detection: Yifan Wang,

Yian Zhao,

Fanqi Pu,

Xiaochen Yang,

Yang Tang,

Xi Chen,

Wenming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yifan and Zhao, Yian and Pu, Fanqi and Yang, Xiaochen and Tang, Yang and Chen, Xi and Yang, Wenming}, title = {SPAN: Spatial-Projection Alignment for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40762-40771} }
Is the Modality Gap a Bug or a Feature? A Robustness Perspective: Rhea Chowers,

Oshri Naparstek,

Udi Barzelay,

Yair Weiss; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowers_2026_CVPR, author = {Chowers, Rhea and Naparstek, Oshri and Barzelay, Udi and Weiss, Yair}, title = {Is the Modality Gap a Bug or a Feature? A Robustness Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30288-30298} }
Transform to Transfer: Boosting Adversarial Attack Transferability on Vision-Language Pre-training Models: Yang Li,

Jia-Li Yin,

Luojun Lin,

Wei Lin; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yang and Yin, Jia-Li and Lin, Luojun and Lin, Wei}, title = {Transform to Transfer: Boosting Adversarial Attack Transferability on Vision-Language Pre-training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30141-30150} }
Explore with Long-term Memory: A Benchmark and Multimodal LLM-based Reinforcement Learning Framework for Embodied Exploration: Sen Wang,

Bangwei Liu,

Zhenkun Gao,

Lizhuang Ma,

Xuhong Wang,

Yuan Xie,

Xin Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Sen and Liu, Bangwei and Gao, Zhenkun and Ma, Lizhuang and Wang, Xuhong and Xie, Yuan and Tan, Xin}, title = {Explore with Long-term Memory: A Benchmark and Multimodal LLM-based Reinforcement Learning Framework for Embodied Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37098-37108} }
Multi-Modal Image Fusion via Intervention-Stable Feature Learning: Xue Wang,

Zheng Guan,

Wenhua Qian,

Chengchao Wang,

Runzhuo Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xue and Guan, Zheng and Qian, Wenhua and Wang, Chengchao and Ma, Runzhuo}, title = {Multi-Modal Image Fusion via Intervention-Stable Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33827-33837} }
Dark3R: Learning Structure from Motion in the Dark: Andrew Y. Guo,

Anagh Malik,

SaiKiran Tedla,

Yutong Dai,

Yiqian Qin,

Zach Salehe,

Benjamin Attal,

Sotiris Nousias,

Kiriakos N. Kutulakos,

David B. Lindell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Andrew Y. and Malik, Anagh and Tedla, SaiKiran and Dai, Yutong and Qin, Yiqian and Salehe, Zach and Attal, Benjamin and Nousias, Sotiris and Kutulakos, Kiriakos N. and Lindell, David B.}, title = {Dark3R: Learning Structure from Motion in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34160-34170} }
Efficient and Training-Free Single-Image Diffusion Models: Haojun Qiu,

Kiriakos N. Kutulakos,

David B. Lindell; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Haojun and Kutulakos, Kiriakos N. and Lindell, David B.}, title = {Efficient and Training-Free Single-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36157-36167} }
Detecting Compressed AI-Generated Images via Phase Spectrum Robustness: Kai Li,

Wenqi Ren,

Wei Wang,

Xiaochun Cao; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kai and Ren, Wenqi and Wang, Wei and Cao, Xiaochun}, title = {Detecting Compressed AI-Generated Images via Phase Spectrum Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35427-35436} }
Exposing and Evaluating Hallucinations for GUI Grounding: Zicheng Zhang,

Hongyi Jing,

Rui Lv,

Shuo Fang,

Shiai Zhu,

Junying Wang,

Chunyi Li,

Xiaohong Liu,

Chenguang Ma,

Guangtao Zhai; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zicheng and Jing, Hongyi and Lv, Rui and Fang, Shuo and Zhu, Shiai and Wang, Junying and Li, Chunyi and Liu, Xiaohong and Ma, Chenguang and Zhai, Guangtao}, title = {Exposing and Evaluating Hallucinations for GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40208-40223} }
Beyond Sequential Tools: A Unified VLM Agent System for Photographic Post-Processing via Dynamic Multi-Expert Fusion: Honglin Xiong,

Chenjie Zhu,

Jianbiao Ding,

Zixuan Ni,

Wei Li,

Zhenpeng Mi,

Qian Wang; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Honglin and Zhu, Chenjie and Ding, Jianbiao and Ni, Zixuan and Li, Wei and Mi, Zhenpeng and Wang, Qian}, title = {Beyond Sequential Tools: A Unified VLM Agent System for Photographic Post-Processing via Dynamic Multi-Expert Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41521-41530} }
LIBERO-Plus: A Progressive Robustness Benchmark for Visual-Language-Action Models: Senyu Fei,

Siyin Wang,

Junhao Shi,

Zihao Dai,

Jikun Cai,

Pengfang Qian,

Li Ji,

Xinzhe He,

Shiduo Zhang,

Zhaoye Fei,

Jinlan Fu,

Jingjing Gong,

Xipeng Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Fei_2026_CVPR, author = {Fei, Senyu and Wang, Siyin and Shi, Junhao and Dai, Zihao and Cai, Jikun and Qian, Pengfang and Ji, Li and He, Xinzhe and Zhang, Shiduo and Fei, Zhaoye and Fu, Jinlan and Gong, Jingjing and Qiu, Xipeng}, title = {LIBERO-Plus: A Progressive Robustness Benchmark for Visual-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38574-38583} }
MajutsuCity: Language-driven Aesthetic-adaptive City Generation with Controllable 3D Assets and Layouts: Zilong Huang,

Jun He,

Xiaobin Huang,

Ziyi Xiong,

Yang Luo,

Junyan Ye,

Weijia Li,

Yiping Chen,

Ting Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zilong and He, Jun and Huang, Xiaobin and Xiong, Ziyi and Luo, Yang and Ye, Junyan and Li, Weijia and Chen, Yiping and Han, Ting}, title = {MajutsuCity: Language-driven Aesthetic-adaptive City Generation with Controllable 3D Assets and Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31608-31618} }
OpenDance: Multimodal Controllable 3D Dance Generation with Large-scale Internet Data: Jinlu Zhang,

Zixi Kang,

Libin Liu,

Jianlong Chang,

Qi Tian,

Feng Gao,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinlu and Kang, Zixi and Liu, Libin and Chang, Jianlong and Tian, Qi and Gao, Feng and Wang, Yizhou}, title = {OpenDance: Multimodal Controllable 3D Dance Generation with Large-scale Internet Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28860-28870} }
Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings: Yunxiang Peng,

Mengmeng Ma,

Ziyu Yao,

Xi Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Yunxiang and Ma, Mengmeng and Yao, Ziyu and Peng, Xi}, title = {Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38936-38946} }
AniMimic: Imitating 3D Animation from Video Priors: Tianyi Xie,

Yunuo Chen,

Yaowei Guo,

Yin Yang,

Bolei Zhou,

Demetri Terzopoulos,

Ying Jiang,

Chenfanfu Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tianyi and Chen, Yunuo and Guo, Yaowei and Yang, Yin and Zhou, Bolei and Terzopoulos, Demetri and Jiang, Ying and Jiang, Chenfanfu}, title = {AniMimic: Imitating 3D Animation from Video Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40266-40276} }
Reviving ConvNeXt for Efficient Convolutional Diffusion Models: Taesung Kwon,

Lorenzo Bianchi,

Lennart Wittke,

Felix Watine,

Fabio Carrara,

Jong Chul Ye,

Romann Weber,

Vinicius Azevedo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Taesung and Bianchi, Lorenzo and Wittke, Lennart and Watine, Felix and Carrara, Fabio and Ye, Jong Chul and Weber, Romann and Azevedo, Vinicius}, title = {Reviving ConvNeXt for Efficient Convolutional Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43675-43685} }
DetAny4D: Detect Anything 4D Temporally in a Streaming RGB Video: Jiawei Hou,

Shenghao Zhang,

Can Wang,

Zheng Gu,

Yonggen Ling,

Taiping Zeng,

Xiangyang Xue,

Jingbo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Jiawei and Zhang, Shenghao and Wang, Can and Gu, Zheng and Ling, Yonggen and Zeng, Taiping and Xue, Xiangyang and Zhang, Jingbo}, title = {DetAny4D: Detect Anything 4D Temporally in a Streaming RGB Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32798-32807} }
Hierarchical Long Video Understanding with Audiovisual Entity Cohesion and Agentic Search: Xinlei Yin,

Xiulian Peng,

Xiao Li,

Zhiwei Xiong,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Xinlei and Peng, Xiulian and Li, Xiao and Xiong, Zhiwei and Lu, Yan}, title = {Hierarchical Long Video Understanding with Audiovisual Entity Cohesion and Agentic Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32882-32891} }
CaptionFormer: Unified Segmentation, Tracking, and Captioning for Spatio-Temporal Objects: Gabriel Fiastre,

Antoine Yang,

Cordelia Schmid; [pdf] [supp]
[bibtex]
@InProceedings{Fiastre_2026_CVPR, author = {Fiastre, Gabriel and Yang, Antoine and Schmid, Cordelia}, title = {CaptionFormer: Unified Segmentation, Tracking, and Captioning for Spatio-Temporal Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39518-39528} }
OnlinePG: Online Open-Vocabulary Panoptic Mapping with 3D Gaussian Splatting: Hongjia Zhai,

Qi Zhang,

Xiaokun Pan,

Xiyu Zhang,

Yitong Dong,

Huaqi Zhang,

Dan Xu,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhai_2026_CVPR, author = {Zhai, Hongjia and Zhang, Qi and Pan, Xiaokun and Zhang, Xiyu and Dong, Yitong and Zhang, Huaqi and Xu, Dan and Zhang, Guofeng}, title = {OnlinePG: Online Open-Vocabulary Panoptic Mapping with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33269-33279} }
PyraTok: Language-Aligned Pyramidal Tokenizer for Video Understanding and Generation: Onkar Susladkar,

Tushar Prakash,

Adheesh Juvekar,

Kiet A. Nguyen,

Dong-Hwan Jang,

Inderjit S Dhillon,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Susladkar_2026_CVPR, author = {Susladkar, Onkar and Prakash, Tushar and Juvekar, Adheesh and Nguyen, Kiet A. and Jang, Dong-Hwan and Dhillon, Inderjit S and Lourentzou, Ismini}, title = {PyraTok: Language-Aligned Pyramidal Tokenizer for Video Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37906-37917} }
UniGeoSeg: Towards Unified Open-World Segmentation for Geospatial Scenes: Shuo Ni,

Di Wang,

He Chen,

Haonan Guo,

Ning Zhang,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Shuo and Wang, Di and Chen, He and Guo, Haonan and Zhang, Ning and Zhang, Jing}, title = {UniGeoSeg: Towards Unified Open-World Segmentation for Geospatial Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34865-34876} }
ShiftLUT: Spatial Shift Enhanced Look-Up Tables for Efficient Image Restoration: Xiaolong Zeng,

Yitong Yu,

Shiyao Xiong,

Jinhua Hao,

Ming Sun,

Chao Zhou,

Bin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xiaolong and Yu, Yitong and Xiong, Shiyao and Hao, Jinhua and Sun, Ming and Zhou, Chao and Wang, Bin}, title = {ShiftLUT: Spatial Shift Enhanced Look-Up Tables for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29959-29968} }
MMSD3.0: A Multi-Image Benchmark for Real-World Multimodal Sarcasm Detection: Haochen Zhao,

Yuyao Kong,

Yongxiu Xu,

Gaopeng Gou,

Hongbo Xu,

Yubin Wang,

Haoliang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haochen and Kong, Yuyao and Xu, Yongxiu and Gou, Gaopeng and Xu, Hongbo and Wang, Yubin and Zhang, Haoliang}, title = {MMSD3.0: A Multi-Image Benchmark for Real-World Multimodal Sarcasm Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37885-37895} }
Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization: Weijian Su,

Songqian Zhang,

Yuqi Han,

Jian Zhuang,

Yongdong Huang,

Qiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Weijian and Zhang, Songqian and Han, Yuqi and Zhuang, Jian and Huang, Yongdong and Zhang, Qiang}, title = {Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41499-41509} }
Beyond the Ground Truth: Enhanced Supervision for Image Restoration: Donghun Ryou,

Inju Ha,

Sanghyeok Chu,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ryou_2026_CVPR, author = {Ryou, Donghun and Ha, Inju and Chu, Sanghyeok and Han, Bohyung}, title = {Beyond the Ground Truth: Enhanced Supervision for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29949-29958} }
SineProject: Machine Unlearning for Stable Vision-Language Alignment: Arpit Garg,

Hemanth Saratchandran,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garg_2026_CVPR, author = {Garg, Arpit and Saratchandran, Hemanth and Lucey, Simon}, title = {SineProject: Machine Unlearning for Stable Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31735-31745} }
Delving Aleatoric Uncertainty in Medical Image Segmentation via Vision Foundation Models: Ruiyang Li,

Fang Liu,

Licheng Jiao,

Xinglin Xie,

Jiayao Hao,

Shuo Li,

Xu Liu,

Jingyi Yang,

Lingling Li,

Puhua Chen,

Wenping Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ruiyang and Liu, Fang and Jiao, Licheng and Xie, Xinglin and Hao, Jiayao and Li, Shuo and Liu, Xu and Yang, Jingyi and Li, Lingling and Chen, Puhua and Ma, Wenping}, title = {Delving Aleatoric Uncertainty in Medical Image Segmentation via Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30011-30020} }
Language Models Can Explain Visual Features via Steering: Javier Ferrando,

Enrique Lopez-Cuena,

Pablo Agustin Martin-Torres,

Daniel Hinjos,

Anna Arias-Duart,

Dario Garcia-Gasulla; [pdf] [supp]
[bibtex]
@InProceedings{Ferrando_2026_CVPR, author = {Ferrando, Javier and Lopez-Cuena, Enrique and Martin-Torres, Pablo Agustin and Hinjos, Daniel and Arias-Duart, Anna and Garcia-Gasulla, Dario}, title = {Language Models Can Explain Visual Features via Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38947-38958} }
PanDA: Unsupervised Domain Adaptation for Multimodal 3D Panoptic Segmentation in Autonomous Driving: Yining Pan,

Shijie Li,

Yuchen Wu,

Xulei Yang,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Yining and Li, Shijie and Wu, Yuchen and Yang, Xulei and Zhao, Na}, title = {PanDA: Unsupervised Domain Adaptation for Multimodal 3D Panoptic Segmentation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33057-33067} }
ReCoFuse: Ultra-Robust Image Fusion via Restorative Multi-Modal Diffusion Reciprocal Coupling: Hao Zhang,

Shuhan Yang,

Linfeng Tang,

Xunpeng Yi,

Jiayi Ma; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Yang, Shuhan and Tang, Linfeng and Yi, Xunpeng and Ma, Jiayi}, title = {ReCoFuse: Ultra-Robust Image Fusion via Restorative Multi-Modal Diffusion Reciprocal Coupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33838-33847} }
The Drift Kernel: Why Diffusion Models Change Even When Told Not To: Gokul Srinath Seetha Ram,

Rashmi Elavazhagan; [pdf] [supp]
[bibtex]
@InProceedings{Ram_2026_CVPR, author = {Ram, Gokul Srinath Seetha and Elavazhagan, Rashmi}, title = {The Drift Kernel: Why Diffusion Models Change Even When Told Not To}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43281-43289} }
Accelerating Autoregressive Video Diffusion via History-Guided Cache and Residual Correction: Kepan Nan,

Wangbo Zhao,

Penghao Zhou,

Jun Li,

Zhenheng Yang,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Nan_2026_CVPR, author = {Nan, Kepan and Zhao, Wangbo and Zhou, Penghao and Li, Jun and Yang, Zhenheng and Yang, Jian and Tai, Ying}, title = {Accelerating Autoregressive Video Diffusion via History-Guided Cache and Residual Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43740-43750} }
Personalized Federated Training of Diffusion Models with Privacy Guarantees: Kumar Kshitij Patel,

Bingqing Jiang,

A F M Mahfuzul Kabir,

Weitong Zhang,

Difan Zou,

Lingxiao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Patel_2026_CVPR, author = {Patel, Kumar Kshitij and Jiang, Bingqing and Kabir, A F M Mahfuzul and Zhang, Weitong and Zou, Difan and Wang, Lingxiao}, title = {Personalized Federated Training of Diffusion Models with Privacy Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31790-31801} }
KaLOS finds Consensus: A Meta-Algorithm for Evaluating Inter-Annotator Agreement in Complex Vision Tasks: David Tschirschwitz,

Volker Rodehorst; [pdf] [supp]
[bibtex]
@InProceedings{Tschirschwitz_2026_CVPR, author = {Tschirschwitz, David and Rodehorst, Volker}, title = {KaLOS finds Consensus: A Meta-Algorithm for Evaluating Inter-Annotator Agreement in Complex Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38554-38563} }
Rank-Guided Pseudo-Bias Learning for Robust Black-Box Adaptation: Rajeev Ranjan Dwivedi,

Anshuman Dangwal,

Vinod K Kurmi; [pdf] [supp]
[bibtex]
@InProceedings{Dwivedi_2026_CVPR, author = {Dwivedi, Rajeev Ranjan and Dangwal, Anshuman and Kurmi, Vinod K}, title = {Rank-Guided Pseudo-Bias Learning for Robust Black-Box Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31683-31692} }
Breaking the Regional Perception Bottleneck of Multimodal Large Language Models via External Reasoning Framework: Jinrong Zhang,

Zhaoyang Xu,

Xusheng He,

Xinrui Li,

Na Zheng,

Jianlong Wu; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinrong and Xu, Zhaoyang and He, Xusheng and Li, Xinrui and Zheng, Na and Wu, Jianlong}, title = {Breaking the Regional Perception Bottleneck of Multimodal Large Language Models via External Reasoning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33531-33541} }
Cross-Modal Attention Calibration for LVLM Hallucination Mitigation: Jiaming Li,

Jiacheng Zhang,

Zequn Jie,

Lin Ma,

Ming Li,

Xiaonan Luo,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiaming and Zhang, Jiacheng and Jie, Zequn and Ma, Lin and Li, Ming and Luo, Xiaonan and Li, Guanbin}, title = {Cross-Modal Attention Calibration for LVLM Hallucination Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40186-40196} }
Human Geometry Distribution for 3D Animation Generation: Xiangjun Tang,

Biao Zhang,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Xiangjun and Zhang, Biao and Wonka, Peter}, title = {Human Geometry Distribution for 3D Animation Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38313-38323} }
Moving Border Ownership for Event-based Motion Segmentation: Zhiyuan Hua,

Cornelia Fermüller,

Yiannis Aloimonos; [pdf] [supp]
[bibtex]
@InProceedings{Hua_2026_CVPR, author = {Hua, Zhiyuan and Ferm\"uller, Cornelia and Aloimonos, Yiannis}, title = {Moving Border Ownership for Event-based Motion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37043-37052} }
Towards Visual Query Localization in the 3D World: Liang Peng,

Bohan Tan,

Zhipeng Zhang,

Haobo Li,

Yifan Jiao,

Xingping Dong,

Libo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Liang and Tan, Bohan and Zhang, Zhipeng and Li, Haobo and Jiao, Yifan and Dong, Xingping and Zhang, Libo}, title = {Towards Visual Query Localization in the 3D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41406-41415} }
What Is It Like to Be a Noise? An Entropy-based Gaussian Noise Regularization for Diffusion Models: Pascal Chang,

Studios___ Switzerland 0000-0002-8590-8039,

Kai Lascheit,

Studios___ Switzerland blank,

Jingwei Tang,

Studios___ Switzerland 0009-0000-6005-7808,

Markus Gross,

Studios___ Switzerland 0009-0003-9324-779X,

Vinicius C. Azevedo,

Studios___ Switzerland 0009-0002-4133-4309; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Pascal and 0000-0002-8590-8039, Studios\_\_\_ Switzerland and Lascheit, Kai and blank, Studios\_\_\_ Switzerland and Tang, Jingwei and 0009-0000-6005-7808, Studios\_\_\_ Switzerland and Gross, Markus and 0009-0003-9324-779X, Studios\_\_\_ Switzerland and Azevedo, Vinicius C. and 0009-0002-4133-4309, Studios\_\_\_ Switzerland}, title = {What Is It Like to Be a Noise? An Entropy-based Gaussian Noise Regularization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43471-43481} }
PhysVid: Physics Aware Local Conditioning for Generative Video Models: Saurabh Pathak,

Elahe Arani,

Mykola Pechenizkiy,

Bahram Zonooz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pathak_2026_CVPR, author = {Pathak, Saurabh and Arani, Elahe and Pechenizkiy, Mykola and Zonooz, Bahram}, title = {PhysVid: Physics Aware Local Conditioning for Generative Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41847-41858} }
Harnessing Chain-of-Thought Reasoning in Multimodal Large Language Models for Face Anti-Spoofing: Honglu Zhang,

Zhiqin Fang,

Ningning Zhao,

Saihui Hou,

Long Ma,

Renwang Pei,

Zhaofeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Honglu and Fang, Zhiqin and Zhao, Ningning and Hou, Saihui and Ma, Long and Pei, Renwang and He, Zhaofeng}, title = {Harnessing Chain-of-Thought Reasoning in Multimodal Large Language Models for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33566-33576} }
ArtLLM: Generating Articulated Assets via 3D LLM: Penghao Wang,

Siyuan Xie,

Hongyu Yan,

Xianghui Yang,

Jingwei Huang,

Chunchao Guo,

Jiayuan Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Penghao and Xie, Siyuan and Yan, Hongyu and Yang, Xianghui and Huang, Jingwei and Guo, Chunchao and Gu, Jiayuan}, title = {ArtLLM: Generating Articulated Assets via 3D LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34281-34291} }
DDT: Decoupled Diffusion Transformer: Shuai Wang,

Zhi Tian,

Weilin Huang,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shuai and Tian, Zhi and Huang, Weilin and Wang, Limin}, title = {DDT: Decoupled Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40633-40642} }
Pose-guided Enriched Feature Learning for Federated-by-camera Person Re-identification: JooHyung Oh,

Minyoung Oh,

Sung Whan Yoon,

Jae-Young Sim; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, JooHyung and Oh, Minyoung and Yoon, Sung Whan and Sim, Jae-Young}, title = {Pose-guided Enriched Feature Learning for Federated-by-camera Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40458-40467} }
Intrinsic Geometry-Appearance Consistency Optimization for Sparse-View Gaussian Splatting: Kaiqiang Xiong,

Rui Peng,

Jiahao Wu,

Zhanke Wang,

Jie Liang,

Xiaoyun Zheng,

Feng Gao,

Ronggang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Kaiqiang and Peng, Rui and Wu, Jiahao and Wang, Zhanke and Liang, Jie and Zheng, Xiaoyun and Gao, Feng and Wang, Ronggang}, title = {Intrinsic Geometry-Appearance Consistency Optimization for Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40918-40928} }
SpatialStack: Layered Geometry-Language Fusion for 3D VLM Spatial Reasoning: Jian Zhang,

Shijie Zhou,

Bangya Liu,

Achuta Kadambi,

Zhiwen Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jian and Zhou, Shijie and Liu, Bangya and Kadambi, Achuta and Fan, Zhiwen}, title = {SpatialStack: Layered Geometry-Language Fusion for 3D VLM Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38678-38688} }
Hint2Gen: Bridging Understanding and Generation via Code-structured Hints: Yuanpeng Tu,

Yunpeng Chen,

Xi Chen,

Liang Li,

Hengshuang Zhao; [pdf]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Yuanpeng and Chen, Yunpeng and Chen, Xi and Li, Liang and Zhao, Hengshuang}, title = {Hint2Gen: Bridging Understanding and Generation via Code-structured Hints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36593-36603} }
Goldilocks Test Sets for Face Verification: Haiyu Wu,

Sicong Tian,

Aman Bhatta,

Jacob Gutierrez,

Grace Bezold,

Genesis Argueta,

Karl Ricanek,

Michael C. King,

Kevin Bowyer; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haiyu and Tian, Sicong and Bhatta, Aman and Gutierrez, Jacob and Bezold, Grace and Argueta, Genesis and Ricanek, Karl and King, Michael C. and Bowyer, Kevin}, title = {Goldilocks Test Sets for Face Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35504-35513} }
Abstract 3D Perception for Spatial Intelligence in Vision-Language Models: Yifan Liu,

Fangneng Zhan,

Kaichen Zhou,

Yilun Du,

Paul Pu Liang,

Hanspeter Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yifan and Zhan, Fangneng and Zhou, Kaichen and Du, Yilun and Liang, Paul Pu and Pfister, Hanspeter}, title = {Abstract 3D Perception for Spatial Intelligence in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38647-38656} }
Learning to Infer Parameterized Representations of Plants from 3D Scans: Samara Ghrer,

Christophe Godin,

Stefanie Wuhrer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghrer_2026_CVPR, author = {Ghrer, Samara and Godin, Christophe and Wuhrer, Stefanie}, title = {Learning to Infer Parameterized Representations of Plants from 3D Scans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42236-42245} }
Investigating Self-Supervised Representations for Audio-Visual Deepfake Detection: Dragos-Alexandru Boldisor,

Stefan Smeu,

Dan Oneata,

Elisabeta Oneata; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boldisor_2026_CVPR, author = {Boldisor, Dragos-Alexandru and Smeu, Stefan and Oneata, Dan and Oneata, Elisabeta}, title = {Investigating Self-Supervised Representations for Audio-Visual Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43018-43029} }
TF-SSD: A Strong Pipeline via Synergic Mask Filter for Training-free Co-salient Object Detection: Zhijin He,

Shuo Jin,

Siyue Yu,

Shuwei Wu,

Bingfeng Zhang,

Li Yu,

Jimin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zhijin and Jin, Shuo and Yu, Siyue and Wu, Shuwei and Zhang, Bingfeng and Yu, Li and Xiao, Jimin}, title = {TF-SSD: A Strong Pipeline via Synergic Mask Filter for Training-free Co-salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32216-32225} }
Structure-to-Intensity Diffusion for Adverse-Weather LiDAR Generation: Peiyang Ni,

Longyu Yang,

Lu Zhang,

Kuniaki Saito,

Yap-Peng Tan,

Fumin Shen,

Heng Tao Shen,

Xiaofeng Zhu,

Ping Hu; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Peiyang and Yang, Longyu and Zhang, Lu and Saito, Kuniaki and Tan, Yap-Peng and Shen, Fumin and Shen, Heng Tao and Zhu, Xiaofeng and Hu, Ping}, title = {Structure-to-Intensity Diffusion for Adverse-Weather LiDAR Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35904-35914} }
AV-Reasoner: Improving and Benchmarking Clue-Grounded Audio-Visual Counting for MLLMs: Lidong Lu,

Guo Chen,

Zhu Wei,

Zhiqi Li,

Yicheng Liu,

Tong Lu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Lidong and Chen, Guo and Wei, Zhu and Li, Zhiqi and Liu, Yicheng and Lu, Tong}, title = {AV-Reasoner: Improving and Benchmarking Clue-Grounded Audio-Visual Counting for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33477-33487} }
WRIVINDER: Towards Spatial Intelligence for Geo-locating Ground Images onto Satellite Imagery: Chandrakanth Gudavalli,

Tajuddin Manhar Mohammed,

Abhay Yadav,

Ananth Vishnu Bhaskar,

Hardik Prajapati,

Cheng Peng,

Rama Chellappa,

Shivkumar Chandrasekaran,

B.S. Manjunath; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gudavalli_2026_CVPR, author = {Gudavalli, Chandrakanth and Mohammed, Tajuddin Manhar and Yadav, Abhay and Bhaskar, Ananth Vishnu and Prajapati, Hardik and Peng, Cheng and Chellappa, Rama and Chandrasekaran, Shivkumar and Manjunath, B.S.}, title = {WRIVINDER: Towards Spatial Intelligence for Geo-locating Ground Images onto Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33703-33713} }
VIAFormer: Voxel-Image Alignment Transformer for High-Fidelity Voxel Refinement: Tiancheng Fang,

Bowen Pan,

Lingxi Chen,

Jiangjing Lyu,

Chengfei Lv,

Chaoyue Niu,

Fan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Tiancheng and Pan, Bowen and Chen, Lingxi and Lyu, Jiangjing and Lv, Chengfei and Niu, Chaoyue and Wu, Fan}, title = {VIAFormer: Voxel-Image Alignment Transformer for High-Fidelity Voxel Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29060-29070} }
End-to-End Language-Action Model for Humanoid Whole Body Control: Yuxuan Wang,

Haobin Jiang,

Shiqing Yao,

Ziluo Ding,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuxuan and Jiang, Haobin and Yao, Shiqing and Ding, Ziluo and Lu, Zongqing}, title = {End-to-End Language-Action Model for Humanoid Whole Body Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38399-38409} }
IDESplat: Iterative Depth Probability Estimation for Generalizable 3D Gaussian Splatting: Wei Long,

Haifeng Wu,

Shiyin Jiang,

Jinhua Zhang,

Xinchun Ji,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Wei and Wu, Haifeng and Jiang, Shiyin and Zhang, Jinhua and Ji, Xinchun and Gu, Shuhang}, title = {IDESplat: Iterative Depth Probability Estimation for Generalizable 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33248-33258} }
Reliable Policy Transfer for Safety-Aware End-to-End Driving with Deep Reinforcement Learning: Uddin Md. Borhan,

Arif Raza,

Zhiliang Lin,

Lu Wang,

Jianqiang Li,

Jie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Borhan_2026_CVPR, author = {Borhan, Uddin Md. and Raza, Arif and Lin, Zhiliang and Wang, Lu and Li, Jianqiang and Chen, Jie}, title = {Reliable Policy Transfer for Safety-Aware End-to-End Driving with Deep Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32134-32143} }
LacTokGen: Latent Consistency Tokenizer for 1024-pixel Image Generation by 256 Tokens: Qingsong Xie,

Luyuan Zhang,

Zhao Zhang,

Siyuan Li,

Zhe Huang,

Zhenyu Yang,

Haonan Lu; [pdf]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Qingsong and Zhang, Luyuan and Zhang, Zhao and Li, Siyuan and Huang, Zhe and Yang, Zhenyu and Lu, Haonan}, title = {LacTokGen: Latent Consistency Tokenizer for 1024-pixel Image Generation by 256 Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30368-30380} }
BEV-SLD: Self-Supervised Scene Landmark Detection for Global Localization with LiDAR Bird's-Eye View Images: David Skuddis,

Vincent Ress,

Wei Zhang,

Vincent Ofosu Nyako,

Norbert Haala; [pdf] [supp]
[bibtex]
@InProceedings{Skuddis_2026_CVPR, author = {Skuddis, David and Ress, Vincent and Zhang, Wei and Nyako, Vincent Ofosu and Haala, Norbert}, title = {BEV-SLD: Self-Supervised Scene Landmark Detection for Global Localization with LiDAR Bird's-Eye View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31400-31409} }
Dejavu: Towards Experience Feedback Learning for Embodied Intelligence: Shaokai Wu,

Yanbiao Ji,

Qiuchang Li,

Zhiyi Zhang,

Qichen He,

Wenyuan Xie,

Guodong Zhang,

Bayram Bayramli,

Yue Ding,

Hongtao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Shaokai and Ji, Yanbiao and Li, Qiuchang and Zhang, Zhiyi and He, Qichen and Xie, Wenyuan and Zhang, Guodong and Bayramli, Bayram and Ding, Yue and Lu, Hongtao}, title = {Dejavu: Towards Experience Feedback Learning for Embodied Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29578-29587} }
Toward Diffusible High-Dimensional Latent Spaces: A Frequency Perspective: Bolin Lai,

XuDong Wang,

Saketh Rambhatla,

James M. Rehg,

Zsolt Kira,

Rohit Girdhar,

Ishan Misra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Bolin and Wang, XuDong and Rambhatla, Saketh and Rehg, James M. and Kira, Zsolt and Girdhar, Rohit and Misra, Ishan}, title = {Toward Diffusible High-Dimensional Latent Spaces: A Frequency Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43450-43460} }
CORE: Compact Object-centric REpresentations as a New Paradigm for Token Merging in LVLMs: Jingyu Lei,

Gaoang Wang,

Der-Horng Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Jingyu and Wang, Gaoang and Lee, Der-Horng}, title = {CORE: Compact Object-centric REpresentations as a New Paradigm for Token Merging in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39593-39605} }
MacTok: Robust Continuous Tokenization for Image Generation: Hengyu Zeng,

Xin Gao,

Guanghao Li,

Yuxiang Yan,

Jiaoyang Ruan,

Junpeng Ma,

Haoyu Albert Wang,

Jian Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Hengyu and Gao, Xin and Li, Guanghao and Yan, Yuxiang and Ruan, Jiaoyang and Ma, Junpeng and Wang, Haoyu Albert and Pu, Jian}, title = {MacTok: Robust Continuous Tokenization for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43407-43417} }
MarkushGrapher-2: End-to-end Multimodal Recognition of Chemical Structures: Tim Strohmeyer,

Lucas Morin,

Gerhard Ingmar Meijer,

Valery Weber,

Ahmed Nassar,

Peter Staar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Strohmeyer_2026_CVPR, author = {Strohmeyer, Tim and Morin, Lucas and Meijer, Gerhard Ingmar and Weber, Valery and Nassar, Ahmed and Staar, Peter}, title = {MarkushGrapher-2: End-to-end Multimodal Recognition of Chemical Structures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31176-31185} }
Scene-VLM: Multimodal Video Scene Segmentation via Vision-Language Models: Nimrod Berman,

Adam Botach,

Emanuel Ben-Baruch,

Shunit Haviv Hakimi,

Asaf Gendler,

Ilan Naiman,

Erez Yosef,

Igor Kviatkovsky; [pdf] [supp]
[bibtex]
@InProceedings{Berman_2026_CVPR, author = {Berman, Nimrod and Botach, Adam and Ben-Baruch, Emanuel and Hakimi, Shunit Haviv and Gendler, Asaf and Naiman, Ilan and Yosef, Erez and Kviatkovsky, Igor}, title = {Scene-VLM: Multimodal Video Scene Segmentation via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39848-39857} }
CICA: Coupling Confidence-Aware Pretraining with Confidence-Informed Attention for Robust Multimodal Sentiment Analysis: Haoyu Jiang,

Xiaoliang Chen,

Duoqian Miao,

Xiaolin Qin,

Xianyong Li,

Yajun Du; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haoyu and Chen, Xiaoliang and Miao, Duoqian and Qin, Xiaolin and Li, Xianyong and Du, Yajun}, title = {CICA: Coupling Confidence-Aware Pretraining with Confidence-Informed Attention for Robust Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37842-37851} }
LongVideo-R1: Smart Navigation for Low-cost Long Video Understanding: Jihao Qiu,

Lingxi Xie,

Xinyue Huo,

Qi Tian,

Qixiang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Jihao and Xie, Lingxi and Huo, Xinyue and Tian, Qi and Ye, Qixiang}, title = {LongVideo-R1: Smart Navigation for Low-cost Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40505-40515} }
HyCal: A Training-Free Prototype Calibration Method for Cross-Discipline Few-Shot Class-Incremental Learning: Eunju Lee,

MiHyeon Kim,

JuneHyoung Kwon,

Yoonji Lee,

JiHyun Kim,

Soojin Jang,

YoungBin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Eunju and Kim, MiHyeon and Kwon, JuneHyoung and Lee, Yoonji and Kim, JiHyun and Jang, Soojin and Kim, YoungBin}, title = {HyCal: A Training-Free Prototype Calibration Method for Cross-Discipline Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29462-29471} }
HDR-VLM: HDR-Domain Adaptation of VLMs and Preference-Aligned Quality Assessment for HDR Video Color Grading: Hao Yuan,

Jiabin Zhang,

Yajing Wu,

Ruixuan Pang,

Jing Li; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Hao and Zhang, Jiabin and Wu, Yajing and Pang, Ruixuan and Li, Jing}, title = {HDR-VLM: HDR-Domain Adaptation of VLMs and Preference-Aligned Quality Assessment for HDR Video Color Grading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40792-40801} }
ReMoRa: Multimodal Large Language Model based on Refined Motion Representation for Long-Video Understanding: Daichi Yashima,

Shuhei Kurita,

Yusuke Oda,

Komei Sugiura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yashima_2026_CVPR, author = {Yashima, Daichi and Kurita, Shuhei and Oda, Yusuke and Sugiura, Komei}, title = {ReMoRa: Multimodal Large Language Model based on Refined Motion Representation for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31845-31855} }
Seeing without Pixels: Perception from Camera Trajectories: Zihui Xue,

Kristen Grauman,

Dima Damen,

Andrew Zisserman,

Tengda Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Zihui and Grauman, Kristen and Damen, Dima and Zisserman, Andrew and Han, Tengda}, title = {Seeing without Pixels: Perception from Camera Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38836-38847} }
A Unified Framework for Knowledge Transfer in Bidirectional Model Scaling: Jianlu Shen,

Fu Feng,

Jiaze Xu,

Yucheng Xie,

Jiaqi Lv,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Jianlu and Feng, Fu and Xu, Jiaze and Xie, Yucheng and Lv, Jiaqi and Geng, Xin}, title = {A Unified Framework for Knowledge Transfer in Bidirectional Model Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34535-34545} }
MoECLIP: Patch-Specialized Experts for Zero-shot Anomaly Detection: Jun Yeong Park,

JunYoung Seo,

Minji Kang,

Yu Rang Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jun Yeong and Seo, JunYoung and Kang, Minji and Park, Yu Rang}, title = {MoECLIP: Patch-Specialized Experts for Zero-shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35534-35544} }
SLARM: Streaming and Language-Aligned Reconstruction Model for Dynamic Scenes: Zhicheng Qiu,

Jiarui Meng,

Tong-an Luo,

Yican Huang,

Xuan Feng,

Xuanfu Li,

Zhan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Zhicheng and Meng, Jiarui and Luo, Tong-an and Huang, Yican and Feng, Xuan and Li, Xuanfu and Xu, Zhan}, title = {SLARM: Streaming and Language-Aligned Reconstruction Model for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29023-29034} }
Unified Camera Positional Encoding for Controlled Video Generation: Cheng Zhang,

Boying Li,

Meng Wei,

Yan-Pei Cao,

Camilo Gambardella,

Dinh Phung,

Jianfei Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Cheng and Li, Boying and Wei, Meng and Cao, Yan-Pei and Gambardella, Camilo and Phung, Dinh and Cai, Jianfei}, title = {Unified Camera Positional Encoding for Controlled Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38027-38037} }
Toward Low-Cost yet Effective Temporal Learning for UAV Tracking: Chaocan Xue,

Qihua Liang,

Bineng Zhong,

Yanting Zu,

Yuanliang Xue,

Haiying Xia,

Shuxiang Song; [pdf]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Chaocan and Liang, Qihua and Zhong, Bineng and Zu, Yanting and Xue, Yuanliang and Xia, Haiying and Song, Shuxiang}, title = {Toward Low-Cost yet Effective Temporal Learning for UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42538-42548} }
Rethinking Intermediate Representation for VLM-based Robot Manipulation: Weiliang Tang,

Jialin Gao,

Jia-Hui Pan,

Gang Wang,

Li Erran Li,

Yun-Hui Liu,

Mingyu Ding,

Pheng-Ann Heng,

Chi-Wing Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Weiliang and Gao, Jialin and Pan, Jia-Hui and Wang, Gang and Li, Li Erran and Liu, Yun-Hui and Ding, Mingyu and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {Rethinking Intermediate Representation for VLM-based Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29652-29662} }
Learning Diffeomorphism for Medical Image Registration with Time-Embedded Architectures Using Semigroup Regularization: Mohammadjavad Matinkia,

Nilanjan Ray; [pdf] [supp]
[bibtex]
@InProceedings{Matinkia_2026_CVPR, author = {Matinkia, Mohammadjavad and Ray, Nilanjan}, title = {Learning Diffeomorphism for Medical Image Registration with Time-Embedded Architectures Using Semigroup Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28775-28785} }
Driving on Registers: Ellington Kirby,

Alexandre Boulch,

Yihong Xu,

Yuan Yin,

Gilles Puy,

Éloi Zablocki,

Andrei Bursuc,

Spyros Gidaris,

Renaud Marlet,

Florent Bartoccioni,

Anh-Quan Cao,

Nermin Samet,

Tuan-Hung VU,

Matthieu Cord; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kirby_2026_CVPR, author = {Kirby, Ellington and Boulch, Alexandre and Xu, Yihong and Yin, Yuan and Puy, Gilles and Zablocki, \'Eloi and Bursuc, Andrei and Gidaris, Spyros and Marlet, Renaud and Bartoccioni, Florent and Cao, Anh-Quan and Samet, Nermin and VU, Tuan-Hung and Cord, Matthieu}, title = {Driving on Registers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32058-32069} }
Sparse Task Vector Mixup with Hypernetworks for Efficient Knowledge Transfer in Whole-Slide Image Prognosis: Pei Liu,

Xiangxiang Zeng,

Tengfei Ma,

Yucheng Xing,

Xuanbai Ren,

Yiping Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Pei and Zeng, Xiangxiang and Ma, Tengfei and Xing, Yucheng and Ren, Xuanbai and Liu, Yiping}, title = {Sparse Task Vector Mixup with Hypernetworks for Efficient Knowledge Transfer in Whole-Slide Image Prognosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35238-35247} }
TokenTrace: Multi-Concept Attribution through Watermarked Token Recovery: Li Zhang,

Shruti Agarwal,

John Collomosse,

Pengtao Xie,

Vishal Asnani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Li and Agarwal, Shruti and Collomosse, John and Xie, Pengtao and Asnani, Vishal}, title = {TokenTrace: Multi-Concept Attribution through Watermarked Token Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42942-42952} }
MAD: Modality-Adaptive Decoding for Mitigating Cross-Modal Hallucinations in Multimodal Large Language Models: Sangyun Chung,

Se Yeon Kim,

Youngchae Chee,

Yong Man Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2026_CVPR, author = {Chung, Sangyun and Kim, Se Yeon and Chee, Youngchae and Ro, Yong Man}, title = {MAD: Modality-Adaptive Decoding for Mitigating Cross-Modal Hallucinations in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40175-40185} }
MMR-AD: A Large-Scale Multimodal Dataset for Benchmarking General Anomaly Detection with Multimodal Large Language Models: Xincheng Yao,

Zefeng Qian,

Chao Shi,

Jiayang Song,

Chongyang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Xincheng and Qian, Zefeng and Shi, Chao and Song, Jiayang and Zhang, Chongyang}, title = {MMR-AD: A Large-Scale Multimodal Dataset for Benchmarking General Anomaly Detection with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43072-43082} }
DeltaQuant: 4-bit Video Diffusion Models with Spatiotemporal Delta Smoothing: Xingyang Li,

Samuel Tesfai,

Zhekai Zhang,

Haocheng Xi,

Shuo Yang,

Lvmin Zhang,

Yufei Sun,

Kelly Peng,

Maneesh Agrawala,

Ion Stoica,

Kurt Keutzer,

Jun-Yan Zhu,

Song Han,

Yujun Lin,

Muyang Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xingyang and Tesfai, Samuel and Zhang, Zhekai and Xi, Haocheng and Yang, Shuo and Zhang, Lvmin and Sun, Yufei and Peng, Kelly and Agrawala, Maneesh and Stoica, Ion and Keutzer, Kurt and Zhu, Jun-Yan and Han, Song and Lin, Yujun and Li, Muyang}, title = {DeltaQuant: 4-bit Video Diffusion Models with Spatiotemporal Delta Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43578-43588} }
Improving Motion in Image-to-Video Models via Adaptive Low-Pass Guidance: June Suk Choi,

Kyungmin Lee,

Sihyun Yu,

Yisol Choi,

Jinwoo Shin,

Kimin Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, June Suk and Lee, Kyungmin and Yu, Sihyun and Choi, Yisol and Shin, Jinwoo and Lee, Kimin}, title = {Improving Motion in Image-to-Video Models via Adaptive Low-Pass Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40364-40375} }
Otil: Accelerating Diffusion Model Inference via Communication-Efficient Multi-GPU Parallelism: Xin Li,

Shujun Tian,

Tao Lu,

Han Bao,

Zonghui Wang,

Wenzhi Chen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xin and Tian, Shujun and Lu, Tao and Bao, Han and Wang, Zonghui and Chen, Wenzhi}, title = {Otil: Accelerating Diffusion Model Inference via Communication-Efficient Multi-GPU Parallelism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38487-38497} }
AnomalyVFM -- Transforming Vision Foundation Models into Zero-Shot Anomaly Detectors: Matic Fučka,

Vitjan Zavrtanik,

Danijel Skočaj; [pdf] [supp]
[bibtex]
@InProceedings{Fucka_2026_CVPR, author = {Fu\v{c}ka, Matic and Zavrtanik, Vitjan and Sko\v{c}aj, Danijel}, title = {AnomalyVFM -- Transforming Vision Foundation Models into Zero-Shot Anomaly Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35555-35566} }
VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation: Junwen Tan,

Jinglin Liang,

Hongyuan Chen,

Shuangping Huang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Junwen and Liang, Jinglin and Chen, Hongyuan and Huang, Shuangping}, title = {VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37918-37928} }
TreeTeaming: Autonomous Red-Teaming of Vision-Language Models via Hierarchical Strategy Exploration: Chunxiao Li,

Lijun Li,

Jing Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunxiao and Li, Lijun and Shao, Jing}, title = {TreeTeaming: Autonomous Red-Teaming of Vision-Language Models via Hierarchical Strategy Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37684-37693} }
Learning Latent Concepts for Detecting Out-of-Distribution Objects: Ting Peng,

Junhao Dong,

Yew-Soon Ong; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Ting and Dong, Junhao and Ong, Yew-Soon}, title = {Learning Latent Concepts for Detecting Out-of-Distribution Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28723-28733} }
HiCoGen: Hierarchical Compositional Text-to-Image Generation in Diffusion Models via Reinforcement Learning: Hongji Yang,

Yucheng Zhou,

Wencheng Han,

Runzhou Tao,

Zhongying Qiu,

Jianfei Yang,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Hongji and Zhou, Yucheng and Han, Wencheng and Tao, Runzhou and Qiu, Zhongying and Yang, Jianfei and Shen, Jianbing}, title = {HiCoGen: Hierarchical Compositional Text-to-Image Generation in Diffusion Models via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36793-36802} }
QUANTIPHY: A Quantitative Benchmark Evaluating Physical Reasoning Abilities of Vision-Language Models: Li Puyin,

Tiange Xiang,

Ella Mao,

Shirley Wei,

Xinye Chen,

Adnan Masood,

Li Fei-Fei,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Puyin_2026_CVPR, author = {Puyin, Li and Xiang, Tiange and Mao, Ella and Wei, Shirley and Chen, Xinye and Masood, Adnan and Fei-Fei, Li and Adeli, Ehsan}, title = {QUANTIPHY: A Quantitative Benchmark Evaluating Physical Reasoning Abilities of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33174-33184} }
From None to All: Self-Supervised 3D Reconstruction via Novel View Synthesis: Ranran Huang,

Weixun Luo,

Ye Mao,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Ranran and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {From None to All: Self-Supervised 3D Reconstruction via Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37358-37369} }
PLACID: Identity-Preserving Multi-Object Compositing via Video Diffusion with Synthetic Trajectories: Gemma Canet Tarrés,

Manel Baradad,

Francesc Moreno-Noguer,

Yumeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Tarres_2026_CVPR, author = {Tarr\'es, Gemma Canet and Baradad, Manel and Moreno-Noguer, Francesc and Li, Yumeng}, title = {PLACID: Identity-Preserving Multi-Object Compositing via Video Diffusion with Synthetic Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38060-38070} }
Deconstructing the Failure of Ideal Noise Correction: A Three-Pillar Diagnosis: Chen Feng,

Zhuo Zhi,

Zhao Huang,

Jiawei Ge,

Ling Xiao,

Nicu Sebe,

Georgios Tzimiropoulos,

Ioannis Patras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Chen and Zhi, Zhuo and Huang, Zhao and Ge, Jiawei and Xiao, Ling and Sebe, Nicu and Tzimiropoulos, Georgios and Patras, Ioannis}, title = {Deconstructing the Failure of Ideal Noise Correction: A Three-Pillar Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34512-34523} }
Temporal Interaction in Spiking Transformers with Multi-Delay Mixer: Kexin Shi,

Hanwen Liu,

Zeyang Song,

Yang Liu,

Jieyuan Zhang,

Shuai Wang,

Jibin Wu,

Malu Zhang,

Yang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Kexin and Liu, Hanwen and Song, Zeyang and Liu, Yang and Zhang, Jieyuan and Wang, Shuai and Wu, Jibin and Zhang, Malu and Yang, Yang}, title = {Temporal Interaction in Spiking Transformers with Multi-Delay Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34555-34565} }
Sketch2Colab: Sketch-Conditioned Multi-Human Animation via Controllable Flow Distillation: Divyanshu Daiya,

Aniket Bera; [pdf] [arXiv]
[bibtex]
@InProceedings{Daiya_2026_CVPR, author = {Daiya, Divyanshu and Bera, Aniket}, title = {Sketch2Colab: Sketch-Conditioned Multi-Human Animation via Controllable Flow Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30675-30685} }
Efficient Equivariant Transformer for Self-Driving Agent Modeling: Scott Xu,

Dian Chen,

Kelvin Wong,

Chris Zhang,

Kion Fallah,

Raquel Urtasun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Scott and Chen, Dian and Wong, Kelvin and Zhang, Chris and Fallah, Kion and Urtasun, Raquel}, title = {Efficient Equivariant Transformer for Self-Driving Agent Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32164-32172} }
Temporal Representation Enhancement (TRE): Learning to Forget Dominant Patterns for Enhanced Temporal Spiking Features: Wei Liu,

Li Yang,

Yufei Wang,

Han Xiao,

Boyu Cai,

Weiming Hu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Wei and Yang, Li and Wang, Yufei and Xiao, Han and Cai, Boyu and Hu, Weiming}, title = {Temporal Representation Enhancement (TRE): Learning to Forget Dominant Patterns for Enhanced Temporal Spiking Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34480-34490} }
Paper2Figure: A Multi-Agent Collaborative System for Figure Generation Towards Academic Research Paper: Siwei Han,

Haonian Ji,

Siyang Xin,

Juanquan Shi,

Shi Qiu,

Xinyu Ye,

Peng Xia,

Jiaqi Liu,

Zhaorun Chen,

Yiyang Zhou,

Linjie Li,

Lijuan Wang,

Huaxiu Yao; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Siwei and Ji, Haonian and Xin, Siyang and Shi, Juanquan and Qiu, Shi and Ye, Xinyu and Xia, Peng and Liu, Jiaqi and Chen, Zhaorun and Zhou, Yiyang and Li, Linjie and Wang, Lijuan and Yao, Huaxiu}, title = {Paper2Figure: A Multi-Agent Collaborative System for Figure Generation Towards Academic Research Paper}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29157-29166} }
OctoT2I: A Self-Evolving Agentic Text-to-Image Router: Xu Jiang,

Bin Chen,

Gehui Li,

Yule Duan,

Ronggang Wang,

Jian Zhang; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xu and Chen, Bin and Li, Gehui and Duan, Yule and Wang, Ronggang and Zhang, Jian}, title = {OctoT2I: A Self-Evolving Agentic Text-to-Image Router}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31628-31638} }
Dynamic Label Noise Suppression with Optimal Teacher Pool for Facial Expression Recognition: Yuzhuang Yang,

Xiaolin Tian,

Qigong Sun; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuzhuang and Tian, Xiaolin and Sun, Qigong}, title = {Dynamic Label Noise Suppression with Optimal Teacher Pool for Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32389-32398} }
YOSE: You Only Select Essential Tokens for Efficient DiT-based Video Object Removal: Chenyang Wu,

Lina Lei,

Fan Li,

Chunle Guo,

Dehong Kong,

Xinran Qin,

Zhixin Wang,

Mingming Cheng,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyang and Lei, Lina and Li, Fan and Guo, Chunle and Kong, Dehong and Qin, Xinran and Wang, Zhixin and Cheng, Mingming and Li, Chongyi}, title = {YOSE: You Only Select Essential Tokens for Efficient DiT-based Video Object Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32926-32935} }
Virtual Nodes Guided Dynamic Graph Neural Network for Brain Tumor Segmentation with Missing Modalities: Sha Tao,

Jiao Pan,

Yu Guo,

Chao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Sha and Pan, Jiao and Guo, Yu and Yao, Chao}, title = {Virtual Nodes Guided Dynamic Graph Neural Network for Brain Tumor Segmentation with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37528-37537} }
MatAnyone 2: Scaling Video Matting via a Learned Quality Evaluator: Peiqing Yang,

Shangchen Zhou,

Kai Hao,

Qingyi Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Peiqing and Zhou, Shangchen and Hao, Kai and Tao, Qingyi}, title = {MatAnyone 2: Scaling Video Matting via a Learned Quality Evaluator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37476-37485} }
Harmonic Canvas: Inversion-Free Editing for Visually-Guided Music Style Transfer: Yue Lei,

Siqi Yang,

Ting Zhong,

Fan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Yue and Yang, Siqi and Zhong, Ting and Zhou, Fan}, title = {Harmonic Canvas: Inversion-Free Editing for Visually-Guided Music Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29727-29737} }
VideoCoF: Unified Video Editing with Temporal Reasoner: Xiangpeng Yang,

Ji Xie,

Yiyuan Yang,

Yue Ma,

Yan Huang,

Min Xu,

Qiang Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiangpeng and Xie, Ji and Yang, Yiyuan and Ma, Yue and Huang, Yan and Xu, Min and Wu, Qiang}, title = {VideoCoF: Unified Video Editing with Temporal Reasoner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37940-37949} }
Motion 3-to-4: 3D Motion Reconstruction for 4D Synthesis: Hongyuan Chen,

Xingyu Chen,

Zexiang Xu,

Anpei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hongyuan and Chen, Xingyu and Xu, Zexiang and Chen, Anpei}, title = {Motion 3-to-4: 3D Motion Reconstruction for 4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28947-28958} }
Generative Modeling of Weights: Generalization or Memorization?: Boya Zeng,

Yida Yin,

Zhiqiu Xu,

Zhuang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Boya and Yin, Yida and Xu, Zhiqiu and Liu, Zhuang}, title = {Generative Modeling of Weights: Generalization or Memorization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41974-41984} }
Addressing Exacerbated Attention Sink for Source-Free Cross-Domain Few-Shot Learning: Shuai Yi,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Shuai and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Addressing Exacerbated Attention Sink for Source-Free Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29494-29503} }
MODIX: A Training-Free Multimodal Information-Driven Positional Index Scaling for Vision-Language Models: Ruoxiang Huang,

Zhen Yuan; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Ruoxiang and Yuan, Zhen}, title = {MODIX: A Training-Free Multimodal Information-Driven Positional Index Scaling for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31534-31543} }
CoIn3D: Revisiting Configuration-Invariant Multi-Camera 3D Object Detection: Zhaonian Kuang,

Rui Ding,

Haotian Wang,

Xinhu Zheng,

Meng Yang,

Gang Hua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2026_CVPR, author = {Kuang, Zhaonian and Ding, Rui and Wang, Haotian and Zheng, Xinhu and Yang, Meng and Hua, Gang}, title = {CoIn3D: Revisiting Configuration-Invariant Multi-Camera 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40707-40716} }
StreamRAG: Enhancing Real-Time Video Understanding with Retrieval Augmentation: Junlin Xie,

Quanlong Zheng,

Ruifei Zhang,

Kuo Wang,

Yanhao Zhang,

Jinguo Luo,

Haonan Lu,

Xiang Wan,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Junlin and Zheng, Quanlong and Zhang, Ruifei and Wang, Kuo and Zhang, Yanhao and Luo, Jinguo and Lu, Haonan and Wan, Xiang and Li, Guanbin}, title = {StreamRAG: Enhancing Real-Time Video Understanding with Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38870-38879} }
Improving Vision-language Models with Perception-centric Process Reward Models: Yingqian Min,

Kun Zhou,

Yifan Li,

Yuhuan Wu,

Han Peng,

Yifan Du,

Wayne Xin Zhao,

Min Yang,

Ji-Rong Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Min_2026_CVPR, author = {Min, Yingqian and Zhou, Kun and Li, Yifan and Wu, Yuhuan and Peng, Han and Du, Yifan and Zhao, Wayne Xin and Yang, Min and Wen, Ji-Rong}, title = {Improving Vision-language Models with Perception-centric Process Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33099-33109} }
Spatial Matters: Position-Guided 3D Referring Expression Segmentation: Yabing Wang,

Zhuotao Tian,

Le Wang,

Zheng Qin,

Sanping Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yabing and Tian, Zhuotao and Wang, Le and Qin, Zheng and Zhou, Sanping}, title = {Spatial Matters: Position-Guided 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39486-39496} }
ReHyAt: Recurrent Hybrid Attention for Video Diffusion Transformers: Mohsen Ghafoorian,

Amirhossein Habibian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghafoorian_2026_CVPR, author = {Ghafoorian, Mohsen and Habibian, Amirhossein}, title = {ReHyAt: Recurrent Hybrid Attention for Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40674-40684} }
MultiCrafter: High-Fidelity Multi-Subject Generation via Disentangled Attention and Identity-Aware Preference Alignment: Tao Wu,

Yibo Jiang,

Yehao Lu,

Zhizhong Wang,

Zeyi Huang,

Zequn Qin,

Xi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Jiang, Yibo and Lu, Yehao and Wang, Zhizhong and Huang, Zeyi and Qin, Zequn and Li, Xi}, title = {MultiCrafter: High-Fidelity Multi-Subject Generation via Disentangled Attention and Identity-Aware Preference Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36691-36702} }
Benchmarking PhD-Level Coding in 3D Geometric Computer Vision: Wenyi Li,

Renkai Luo,

Yue Yu,

Huan-ang Gao,

Mingju Gao,

Li Yuan,

Chaoyou Fu,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenyi and Luo, Renkai and Yu, Yue and Gao, Huan-ang and Gao, Mingju and Yuan, Li and Fu, Chaoyou and Zhao, Hao}, title = {Benchmarking PhD-Level Coding in 3D Geometric Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30974-30985} }
HP-Edit: A Human-Preference Post-Training Framework for Image Editing: Fan Li,

Chonghuinan Wang,

Lina Lei,

Yuping Qiu,

Jiaqi Xu,

Jiaxiu Jiang,

Xinran Qin,

Zhikai Chen,

Fenglong Song,

Zhixin Wang,

Renjing Pei,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Fan and Wang, Chonghuinan and Lei, Lina and Qiu, Yuping and Xu, Jiaqi and Jiang, Jiaxiu and Qin, Xinran and Chen, Zhikai and Song, Fenglong and Wang, Zhixin and Pei, Renjing and Zuo, Wangmeng}, title = {HP-Edit: A Human-Preference Post-Training Framework for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43113-43123} }
SocialNav: Training Human-Inspired Foundation Model for Socially-Aware Embodied Navigation: Ziyi Chen,

Yingnan Guo,

Zedong Chu,

Minghua Luo,

Yanfen Shen,

Mingchao Sun,

Junjun Hu,

Shichao Xie,

Yang Kuan,

Pei Shi,

Zhining Gu,

Lu Liu,

Honglin Han,

Xiaolong Wu,

Mu Xu,

Yu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ziyi and Guo, Yingnan and Chu, Zedong and Luo, Minghua and Shen, Yanfen and Sun, Mingchao and Hu, Junjun and Xie, Shichao and Kuan, Yang and Shi, Pei and Gu, Zhining and Liu, Lu and Han, Honglin and Wu, Xiaolong and Xu, Mu and Zhang, Yu}, title = {SocialNav: Training Human-Inspired Foundation Model for Socially-Aware Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28796-28806} }
MSRL: Scaling Generative Multimodal Reward Modeling via Multi-Stage Reinforcement Learning: Chenglong Wang,

Yifu Huo,

Yang Gan,

Qiaozhi He,

Qi Meng,

Bei Li,

Yan Wang,

Junfu Liu,

Tianhua Zhou,

Jingbo Zhu,

Tong Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenglong and Huo, Yifu and Gan, Yang and He, Qiaozhi and Meng, Qi and Li, Bei and Wang, Yan and Liu, Junfu and Zhou, Tianhua and Zhu, Jingbo and Xiao, Tong}, title = {MSRL: Scaling Generative Multimodal Reward Modeling via Multi-Stage Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29410-29420} }
PixelRush: Ultra-Fast, Training-Free High-Resolution Image Generation via One-step Diffusion: Hong-Phuc Lai,

Phong Nguyen,

Anh Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Hong-Phuc and Nguyen, Phong and Tran, Anh}, title = {PixelRush: Ultra-Fast, Training-Free High-Resolution Image Generation via One-step Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35946-35955} }
Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens: Xinxuan Lu,

Charless Fowlkes,

Alexander C. Berg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Xinxuan and Fowlkes, Charless and Berg, Alexander C.}, title = {Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29222-29232} }
It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models: Anne Harrington,

A. Sophia Koepke,

Shyamgopal Karthik,

Trevor Darrell,

Alexei A. Efros; [pdf] [supp]
[bibtex]
@InProceedings{Harrington_2026_CVPR, author = {Harrington, Anne and Koepke, A. Sophia and Karthik, Shyamgopal and Darrell, Trevor and Efros, Alexei A.}, title = {It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43124-43134} }
CamDirector: Towards Long-Term Coherent Video Trajectory Editing: Kejia Yin,

Zhihao Shi,

Weilin Wan,

Yuhongze Zhou,

Yuanhao Yu,

Xinxin Zuo,

Qiang Sun,

Juwei Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Kejia and Shi, Zhihao and Wan, Weilin and Zhou, Yuhongze and Yu, Yuanhao and Zuo, Xinxin and Sun, Qiang and Lu, Juwei}, title = {CamDirector: Towards Long-Term Coherent Video Trajectory Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32683-32692} }
SpiderCam: Low-Power Snapshot Depth from Differential Defocus: Marcos A. Ferreira,

Tianao Li,

John Mamish,

Josiah Hester,

Yaman Sangar,

Qi Guo,

Emma Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ferreira_2026_CVPR, author = {Ferreira, Marcos A. and Li, Tianao and Mamish, John and Hester, Josiah and Sangar, Yaman and Guo, Qi and Alexander, Emma}, title = {SpiderCam: Low-Power Snapshot Depth from Differential Defocus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41699-41709} }
Denoising, Fast and Slow: Difficulty-Aware Adaptive Sampling for Image Generation: Johannes Schusterbauer,

Ming Gui,

Yusong Li,

Pingchuan Ma,

Felix Krause,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schusterbauer_2026_CVPR, author = {Schusterbauer, Johannes and Gui, Ming and Li, Yusong and Ma, Pingchuan and Krause, Felix and Ommer, Bj\"orn}, title = {Denoising, Fast and Slow: Difficulty-Aware Adaptive Sampling for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43260-43270} }
Towards Human-Like Robot Handwriting via Contour-Aware Generation: Yutao Qin,

Gang Dai,

Yifan Zhang,

Youwei Han,

Qisheng He,

Shuangping Huang; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yutao and Dai, Gang and Zhang, Yifan and Han, Youwei and He, Qisheng and Huang, Shuangping}, title = {Towards Human-Like Robot Handwriting via Contour-Aware Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31597-31607} }
UniT: Unified Multimodal Chain-of-Thought Test-time Scaling: Leon Liangyu Chen,

Haoyu Ma,

Zhipeng Fan,

Ziqi Huang,

Animesh Sinha,

Xiaoliang Dai,

Jialiang Wang,

Zecheng He,

Jianwei Yang,

Chunyuan Li,

Junzhe Sun,

Chu Wang,

Serena Yeung-Levy,

Felix Juefei-Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Leon Liangyu and Ma, Haoyu and Fan, Zhipeng and Huang, Ziqi and Sinha, Animesh and Dai, Xiaoliang and Wang, Jialiang and He, Zecheng and Yang, Jianwei and Li, Chunyuan and Sun, Junzhe and Wang, Chu and Yeung-Levy, Serena and Juefei-Xu, Felix}, title = {UniT: Unified Multimodal Chain-of-Thought Test-time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30257-30267} }
VoxTell: Free-Text Promptable Universal 3D Medical Image Segmentation: Maximilian Rokuss,

Moritz Langenberg,

Yannick Kirchhoff,

Fabian Isensee,

Benjamin Hamm,

Constantin Ulrich,

Sebastian Regnery,

Lukas Bauer,

Efthimios Katsigiannopulos,

Tobias Norajitra,

Klaus Maier-Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rokuss_2026_CVPR, author = {Rokuss, Maximilian and Langenberg, Moritz and Kirchhoff, Yannick and Isensee, Fabian and Hamm, Benjamin and Ulrich, Constantin and Regnery, Sebastian and Bauer, Lukas and Katsigiannopulos, Efthimios and Norajitra, Tobias and Maier-Hein, Klaus}, title = {VoxTell: Free-Text Promptable Universal 3D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37538-37557} }
VGent: Visual Grounding via Modular Design for Disentangling Reasoning and Prediction: Weitai Kang,

Jason Kuen,

Mengwei Ren,

Zijun Wei,

Yan Yan,

Kangning Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Weitai and Kuen, Jason and Ren, Mengwei and Wei, Zijun and Yan, Yan and Liu, Kangning}, title = {VGent: Visual Grounding via Modular Design for Disentangling Reasoning and Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41160-41170} }
Probabilistic Prompt Adaptation for Unified Image Aesthetics and Quality Assessment: Takayuki Hara,

Yuya Otsuka; [pdf] [supp]
[bibtex]
@InProceedings{Hara_2026_CVPR, author = {Hara, Takayuki and Otsuka, Yuya}, title = {Probabilistic Prompt Adaptation for Unified Image Aesthetics and Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37236-37246} }
SMAP: Semantic Route Planning with Map-Grounded Multimodal Alignment: Wenjie Zhang,

Chen Yang,

Xin Lu,

Zhen Wang,

Yue Liu,

Bobo Xi,

Pengbo Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenjie and Yang, Chen and Lu, Xin and Wang, Zhen and Liu, Yue and Xi, Bobo and Zhang, Pengbo}, title = {SMAP: Semantic Route Planning with Map-Grounded Multimodal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40108-40118} }
ActivityForensics: A Comprehensive Benchmark for Localizing Manipulated Activity in Videos: Peijun Bao,

Anwei Luo,

Gang Pan,

Alex C. Kot,

Xudong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Peijun and Luo, Anwei and Pan, Gang and Kot, Alex C. and Jiang, Xudong}, title = {ActivityForensics: A Comprehensive Benchmark for Localizing Manipulated Activity in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42987-42996} }
Mitigating The Distribution Shift of Diffusion-based Dataset Distillation: Yue Xu,

Chenyu Hu,

Pengyu An,

Yong-Lu Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yue and Hu, Chenyu and An, Pengyu and Li, Yong-Lu}, title = {Mitigating The Distribution Shift of Diffusion-based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33943-33952} }
Generative Video Compression with One-Dimensional Latent Representation: Zihan Zheng,

Zhaoyang Jia,

Naifu Xue,

Jiahao Li,

Bin Li,

Zongyu Guo,

Xiaoyi Zhang,

Zhenghao Chen,

Houqiang Li,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zihan and Jia, Zhaoyang and Xue, Naifu and Li, Jiahao and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Chen, Zhenghao and Li, Houqiang and Lu, Yan}, title = {Generative Video Compression with One-Dimensional Latent Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41256-41265} }
ExPose: Reinforcing Video Generation Models for Extreme Pose Estimation: Youngho Yoon,

Wonjune Cho,

Hyunho Ha,

Sujung Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Youngho and Cho, Wonjune and Ha, Hyunho and Kim, Sujung and Yoon, Kuk-Jin}, title = {ExPose: Reinforcing Video Generation Models for Extreme Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32636-32646} }
Multi-SpatialMLLM: Multi-Frame Spatial Understanding with Multi-Modal Large Language Models: Runsen Xu,

Weiyao Wang,

Hao Tang,

Xingyu Chen,

Xiaodong Wang,

Fu-Jen Chu,

Matt Feiszli,

Kevin J. Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Runsen and Wang, Weiyao and Tang, Hao and Chen, Xingyu and Wang, Xiaodong and Chu, Fu-Jen and Feiszli, Matt and Liang, Kevin J.}, title = {Multi-SpatialMLLM: Multi-Frame Spatial Understanding with Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31078-31088} }
DocSeeker: Structured Visual Reasoning with Evidence Grounding for Long Document Understanding: Hao Yan,

Yuliang Liu,

Xingchen Liu,

Yuyi Zhang,

Minghui Liao,

Jihao Wu,

Wei Chen,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Hao and Liu, Yuliang and Liu, Xingchen and Zhang, Yuyi and Liao, Minghui and Wu, Jihao and Chen, Wei and Bai, Xiang}, title = {DocSeeker: Structured Visual Reasoning with Evidence Grounding for Long Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41140-41149} }
DriverGaze360: OmniDirectional Driver Attention with Object-Level Guidance: Shreedhar Govil,

Didier Stricker,

Jason Rambach; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Govil_2026_CVPR, author = {Govil, Shreedhar and Stricker, Didier and Rambach, Jason}, title = {DriverGaze360: OmniDirectional Driver Attention with Object-Level Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39786-39795} }
FlashIn: Fast and Accurate Image Inversion for Real-time Image Editing: Guangzhi Wang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Guangzhi}, title = {FlashIn: Fast and Accurate Image Inversion for Real-time Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30425-30434} }
Gloria: Consistent Character Video Generation via Content Anchors: Yuhang Yang,

Fan Zhang,

Huaijin Pi,

Ailing Zeng,

Shuai Guo,

Guowei Xu,

Wei Zhai,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuhang and Zhang, Fan and Pi, Huaijin and Zeng, Ailing and Guo, Shuai and Xu, Guowei and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {Gloria: Consistent Character Video Generation via Content Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36724-36735} }
Action-Geometry Prediction with 3D Geometric Prior for Bimanual Manipulation: Chongyang Xu,

Haipeng Li,

Shen Cheng,

Haoqiang Fan,

Ziliang Feng,

Shuaicheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Chongyang and Li, Haipeng and Cheng, Shen and Fan, Haoqiang and Feng, Ziliang and Liu, Shuaicheng}, title = {Action-Geometry Prediction with 3D Geometric Prior for Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35036-35046} }
Revisiting 2D Foundation Models for Scalable 3D Medical Image Classification: Han Liu,

Bogdan Georgescu,

Yanbo Zhang,

Youngjin Yoo,

Michael Baumgartner,

Riqiang Gao,

Jianing Wang,

Gengyan Zhao,

Eli Gibson,

Dorin Comaniciu,

Sasa Grbic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Han and Georgescu, Bogdan and Zhang, Yanbo and Yoo, Youngjin and Baumgartner, Michael and Gao, Riqiang and Wang, Jianing and Zhao, Gengyan and Gibson, Eli and Comaniciu, Dorin and Grbic, Sasa}, title = {Revisiting 2D Foundation Models for Scalable 3D Medical Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30021-30031} }
UniGen-1.5: Enhancing Image Generation and Editing through Reward Unification in RL: Rui Tian,

Mingfei Gao,

Haiming Gang,

Jiasen Lu,

Zhe Gan,

Yinfei Yang,

Zuxuan Wu,

Afshin Dehghan; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Rui and Gao, Mingfei and Gang, Haiming and Lu, Jiasen and Gan, Zhe and Yang, Yinfei and Wu, Zuxuan and Dehghan, Afshin}, title = {UniGen-1.5: Enhancing Image Generation and Editing through Reward Unification in RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29367-29378} }
Leveraging Verifier-Based Reinforcement Learning in Image Editing: Hanzhong Guo,

Jie Wu,

Jie Liu,

Yu Gao,

Zilyu Ye,

Linxiao Yuan,

Xionghui Wang,

Yizhou Yu,

Weilin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhong and Wu, Jie and Liu, Jie and Gao, Yu and Ye, Zilyu and Yuan, Linxiao and Wang, Xionghui and Yu, Yizhou and Huang, Weilin}, title = {Leveraging Verifier-Based Reinforcement Learning in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34343-34352} }
Imbalanced View Contribution Evaluation and Refinement for Deep Incomplete Multi-View Clustering: Taichun Zhou,

Zhibin Dong,

Hao Tan,

Siwei Wang,

Xinwang Liu,

En Zhu,

Di Hu,

Tianrui Liu,

Chuankun Li,

Kunlun He; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Taichun and Dong, Zhibin and Tan, Hao and Wang, Siwei and Liu, Xinwang and Zhu, En and Hu, Di and Liu, Tianrui and Li, Chuankun and He, Kunlun}, title = {Imbalanced View Contribution Evaluation and Refinement for Deep Incomplete Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39606-39616} }
InnoAds-Composer: Efficient Condition Composition for E-Commerce Poster Generation: Yuxin Qin,

Ke Cao,

Haowei Liu,

Ao Ma,

Fengheng Li,

Honghe Zhu,

Zheng Zhang,

Run Ling,

Wei Feng,

Xuanhua He,

Zhanjie Zhang,

Zhen Guo,

Haoyi Bian,

Jingjing Lv,

Junjie Shen,

Ching Law; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yuxin and Cao, Ke and Liu, Haowei and Ma, Ao and Li, Fengheng and Zhu, Honghe and Zhang, Zheng and Ling, Run and Feng, Wei and He, Xuanhua and Zhang, Zhanjie and Guo, Zhen and Bian, Haoyi and Lv, Jingjing and Shen, Junjie and Law, Ching}, title = {InnoAds-Composer: Efficient Condition Composition for E-Commerce Poster Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32988-32999} }
Progressive Multi-cue Alignment for Unaligned RGBT Tracking: Jiandong Jin,

Chenglong Li,

Hao Feng,

Andong Lu,

Lili Huang,

Jin Tang; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Jiandong and Li, Chenglong and Feng, Hao and Lu, Andong and Huang, Lili and Tang, Jin}, title = {Progressive Multi-cue Alignment for Unaligned RGBT Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35207-35216} }
DynamicVGGT: Learning Dynamic Point Maps for 4D Scene Reconstruction in Autonomous Driving: Zhuolin He,

Jing Li,

Guanghao Li,

Xiaolei Chen,

Jiacheng Tang,

Siyang Zhang,

Zhounan Jin,

Feipeng Cai,

Bin Li,

Jian Pu,

Jia Cai,

Xiangyang Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zhuolin and Li, Jing and Li, Guanghao and Chen, Xiaolei and Tang, Jiacheng and Zhang, Siyang and Jin, Zhounan and Cai, Feipeng and Li, Bin and Pu, Jian and Cai, Jia and Xue, Xiangyang}, title = {DynamicVGGT: Learning Dynamic Point Maps for 4D Scene Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35670-35679} }
Semantics Lead the Way: Harmonizing Semantic and Texture Modeling with Asynchronous Latent Diffusion: Yueming Pan,

Ruoyu Feng,

Qi Dai,

Yuqi Wang,

Wenfeng Lin,

Mingyu Guo,

Chong Luo,

Nanning Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Yueming and Feng, Ruoyu and Dai, Qi and Wang, Yuqi and Lin, Wenfeng and Guo, Mingyu and Luo, Chong and Zheng, Nanning}, title = {Semantics Lead the Way: Harmonizing Semantic and Texture Modeling with Asynchronous Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43664-43674} }
MPL: Match-guided Prototype Learning for Few-shot Action Recognition: Feng Yang,

Jie Zhao,

Fulin Luo,

Anyong Qin,

Tiecheng Song,

Yue Zhao,

Chenqiang Gao,

Junwei Han; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Feng and Zhao, Jie and Luo, Fulin and Qin, Anyong and Song, Tiecheng and Zhao, Yue and Gao, Chenqiang and Han, Junwei}, title = {MPL: Match-guided Prototype Learning for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34439-34448} }
LUMINA: A Multi-Vendor Mammography Benchmark with Energy Harmonization Protocol: Hongyi Pan,

Gorkem Durak,

Halil Ertugrul Aktas,

Andrea M. Bejar,

Baver Tutun,

Emre Uysal,

Ezgi Bulbul,

Mehmet Fatih Dogan,

Berrin Erok,

Berna Akkus Yildirim,

Sukru Mehmet Erturk,

Ulas Bagci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Hongyi and Durak, Gorkem and Aktas, Halil Ertugrul and Bejar, Andrea M. and Tutun, Baver and Uysal, Emre and Bulbul, Ezgi and Dogan, Mehmet Fatih and Erok, Berrin and Yildirim, Berna Akkus and Erturk, Sukru Mehmet and Bagci, Ulas}, title = {LUMINA: A Multi-Vendor Mammography Benchmark with Energy Harmonization Protocol}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35301-35310} }
VerseCrafter: Dynamic Realistic Video World Model with 4D Geometric Control: Sixiao Zheng,

Minghao Yin,

Wenbo Hu,

Xiaoyu Li,

Ying Shan,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Sixiao and Yin, Minghao and Hu, Wenbo and Li, Xiaoyu and Shan, Ying and Fu, Yanwei}, title = {VerseCrafter: Dynamic Realistic Video World Model with 4D Geometric Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40277-40290} }
PhysSkin: Real-Time and Generalizable Physics-Based Animation via Self-Supervised Neural Skinning: Yuanhang Lei,

Tao Cheng,

Xingxuan Li,

Boming Zhao,

Siyuan Huang,

Ruizhen Hu,

Peter Yichen Chen,

Hujun Bao,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Yuanhang and Cheng, Tao and Li, Xingxuan and Zhao, Boming and Huang, Siyuan and Hu, Ruizhen and Chen, Peter Yichen and Bao, Hujun and Cui, Zhaopeng}, title = {PhysSkin: Real-Time and Generalizable Physics-Based Animation via Self-Supervised Neural Skinning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32357-32366} }
Learning Where to Look and How to Judge: Resolution-agnostic Image Quality Assessment with Quality-aware Saliency: Hakan Emre Gedik,

Shashank Gupta,

Alan Bovik; [pdf] [supp]
[bibtex]
@InProceedings{Gedik_2026_CVPR, author = {Gedik, Hakan Emre and Gupta, Shashank and Bovik, Alan}, title = {Learning Where to Look and How to Judge: Resolution-agnostic Image Quality Assessment with Quality-aware Saliency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37507-37517} }
LRHDR: Learning Representation-enhanced HDR Video Reconstruction: Chenzhuo Liao,

Xin Chen,

Bingchen Li,

Yu Meng,

Tao Yue,

Xuemei Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Chenzhuo and Chen, Xin and Li, Bingchen and Meng, Yu and Yue, Tao and Hu, Xuemei}, title = {LRHDR: Learning Representation-enhanced HDR Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41584-41593} }
Prompt-Free Unknown Label Generation for Open World Detection in Remote Sensing: Abdullah Azeem,

Ruisheng Wang,

Qingquan Li,

Abubakar Siddique; [pdf] [supp]
[bibtex]
@InProceedings{Azeem_2026_CVPR, author = {Azeem, Abdullah and Wang, Ruisheng and Li, Qingquan and Siddique, Abubakar}, title = {Prompt-Free Unknown Label Generation for Open World Detection in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34662-34672} }
Beyond the Golden Data: Resolving the Motion-Vision Quality Dilemma via Timestep Selective Training: Xiangyang Luo,

Qingyu Li,

Yuming Li,

Guanbo Huang,

Yongjie Zhu,

Wenyu Qin,

Meng Wang,

Pengfei Wan,

Shao-Lun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Xiangyang and Li, Qingyu and Li, Yuming and Huang, Guanbo and Zhu, Yongjie and Qin, Wenyu and Wang, Meng and Wan, Pengfei and Huang, Shao-Lun}, title = {Beyond the Golden Data: Resolving the Motion-Vision Quality Dilemma via Timestep Selective Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43440-43449} }
Diffusion Probe: Generated Image Result Prediction Using CNN Probes: Bukun Huang,

Benlei Cui,

Zhizeng Ye,

Xuemei Dong,

Tuo Chen,

Hui Xue,

Dingkang Yang,

Longtao Huang,

Haiwen Hong,

Jingqun Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Bukun and Cui, Benlei and Ye, Zhizeng and Dong, Xuemei and Chen, Tuo and Xue, Hui and Yang, Dingkang and Huang, Longtao and Hong, Haiwen and Tang, Jingqun}, title = {Diffusion Probe: Generated Image Result Prediction Using CNN Probes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35926-35935} }
Cross-modal Representation Learning for Diffusion-generated Image Detection: Tao Gong,

Dayong Wang,

Qi Chu,

Bin Liu,

Nenghai Yu; [pdf]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Tao and Wang, Dayong and Chu, Qi and Liu, Bin and Yu, Nenghai}, title = {Cross-modal Representation Learning for Diffusion-generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36092-36102} }
Describe Anything Anywhere At Any Moment: Nicolas Gorlo,

Lukas Schmid,

Luca Carlone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gorlo_2026_CVPR, author = {Gorlo, Nicolas and Schmid, Lukas and Carlone, Luca}, title = {Describe Anything Anywhere At Any Moment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35002-35013} }
Mining Attribute Subspaces for Efficient Fine-tuning of 3D Foundation Models: Yu Jiang,

Hanwen Jiang,

Ahmed Abdelkader,

Wen-Sheng Chu,

Brandon Feng,

Zhangyang Wang,

Qixing Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yu and Jiang, Hanwen and Abdelkader, Ahmed and Chu, Wen-Sheng and Feng, Brandon and Wang, Zhangyang and Huang, Qixing}, title = {Mining Attribute Subspaces for Efficient Fine-tuning of 3D Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29071-29080} }
Beyond Static Frames: Temporal Aggregate-and-Restore Vision Transformer for Human Pose Estimation: Hongwei Fang,

Jiahang Cai,

Xun Wang,

Wenwu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Hongwei and Cai, Jiahang and Wang, Xun and Yang, Wenwu}, title = {Beyond Static Frames: Temporal Aggregate-and-Restore Vision Transformer for Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42891-42900} }
Electromagnetic Inverse Scattering from a Single Transmitter: Yizhe Cheng,

Chunxun Tian,

Haoru Wang,

Wentao Zhu,

Xiaoxuan Ma,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yizhe and Tian, Chunxun and Wang, Haoru and Zhu, Wentao and Ma, Xiaoxuan and Wang, Yizhou}, title = {Electromagnetic Inverse Scattering from a Single Transmitter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34040-34049} }
Physically-Grounded Turbulence Mitigation with Frame-Shared Degradation Parameters: Dongxin Xie,

Yan Huang,

Yong Xu,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Dongxin and Huang, Yan and Xu, Yong and Ji, Hui}, title = {Physically-Grounded Turbulence Mitigation with Frame-Shared Degradation Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29919-29928} }
Exploring the Underwater World Segmentation without Extra Training: Bingyu Li,

Tao Huo,

Da Zhang,

Zhiyuan Zhao,

Junyu Gao,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingyu and Huo, Tao and Zhang, Da and Zhao, Zhiyuan and Gao, Junyu and Li, Xuelong}, title = {Exploring the Underwater World Segmentation without Extra Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39879-39889} }
HOLO: Homography-Guided Pose Estimator Network for Fine-Grained Visual Localization on SD Maps: Xuchang Zhong,

Xu Cao,

Jinke Feng,

Hao Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Xuchang and Cao, Xu and Feng, Jinke and Fang, Hao}, title = {HOLO: Homography-Guided Pose Estimator Network for Fine-Grained Visual Localization on SD Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41376-41385} }
OVOD-Agent: A Markov-Bandit Framework for Proactive Visual Reasoning and Self-Evolving Detection: Chujie Wang,

Jianyu Lu,

Zhiyuan Luo,

Xi Chen,

Chu He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chujie and Lu, Jianyu and Luo, Zhiyuan and Chen, Xi and He, Chu}, title = {OVOD-Agent: A Markov-Bandit Framework for Proactive Visual Reasoning and Self-Evolving Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41416-41425} }
CAST: Context-Aware Dynamic Latent Space Transformation for Interactive Text-to-Image Retrieval: Xuanzuo Lin,

Min Zhang,

Daizong Liu,

Zhiwen Zuo,

Xun Yang,

Changting Lin,

Xun Wang,

Jianfeng Dong; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Xuanzuo and Zhang, Min and Liu, Daizong and Zuo, Zhiwen and Yang, Xun and Lin, Changting and Wang, Xun and Dong, Jianfeng}, title = {CAST: Context-Aware Dynamic Latent Space Transformation for Interactive Text-to-Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38794-38803} }
EE-RL: Vision Language Guided Reinforcement Learning with Explorer and Expert model for End-to-End Autonomous Driving: Xiaolong Li,

Lan Yang,

Ruyang Li,

Shan Fang,

Yang Liu,

Xiangmo Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaolong and Yang, Lan and Li, Ruyang and Fang, Shan and Liu, Yang and Zhao, Xiangmo}, title = {EE-RL: Vision Language Guided Reinforcement Learning with Explorer and Expert model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32082-32092} }
Consistency Beyond Contrast: Enhancing Open-Vocabulary Object Detection Robustness via Contextual Consistency Learning: Bozhao Li,

Shaocong Wu,

Tong Shao,

Senqiao Yang,

Qiben Shan,

Zhuotao Tian,

Jingyong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bozhao and Wu, Shaocong and Shao, Tong and Yang, Senqiao and Shan, Qiben and Tian, Zhuotao and Su, Jingyong}, title = {Consistency Beyond Contrast: Enhancing Open-Vocabulary Object Detection Robustness via Contextual Consistency Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34617-34627} }
NeuROK: Generative 4D Neural Object Kinematics: Chen Geng,

Guangzhao He,

Yue Gao,

Yunzhi Zhang,

Shangzhe Wu,

Jiajun Wu; [pdf]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Chen and He, Guangzhao and Gao, Yue and Zhang, Yunzhi and Wu, Shangzhe and Wu, Jiajun}, title = {NeuROK: Generative 4D Neural Object Kinematics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39239-39251} }
Streaming Video Instruction Tuning: Jiaer Xia,

Peixian Chen,

Mengdan Zhang,

Xing Sun,

Kaiyang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Jiaer and Chen, Peixian and Zhang, Mengdan and Sun, Xing and Zhou, Kaiyang}, title = {Streaming Video Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31219-31229} }
Detecting AI-Generated Forgeries via Iterative Manifold Deviation Amplification: Jiangling Zhang,

Shuxuan Gao,

Bofan Liu,

Siqiang Feng,

Jirui Huang,

Yaxiong Chen,

Ziyu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiangling and Gao, Shuxuan and Liu, Bofan and Feng, Siqiang and Huang, Jirui and Chen, Yaxiong and Chen, Ziyu}, title = {Detecting AI-Generated Forgeries via Iterative Manifold Deviation Amplification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35494-35503} }
OneCAT: Decoder-Only Auto-Regressive Model for Unified Understanding and Generation: Han Li,

Xinyu Peng,

Yaoming Wang,

Zelin Peng,

Xin Chen,

Rongxiang Weng,

Jingang Wang,

Xunliang Cai,

Wenrui Dai,

Hongkai Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Han and Peng, Xinyu and Wang, Yaoming and Peng, Zelin and Chen, Xin and Weng, Rongxiang and Wang, Jingang and Cai, Xunliang and Dai, Wenrui and Xiong, Hongkai}, title = {OneCAT: Decoder-Only Auto-Regressive Model for Unified Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30235-30245} }
A Closed-Form Solution for Debiasing Vision-Language Models with Utility Guarantees Across Modalities and Tasks: Tangzheng Lian,

Guanyu Hu,

Yijing Ren,

Dimitrios Kollias,

Oya Celiktutan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Tangzheng and Hu, Guanyu and Ren, Yijing and Kollias, Dimitrios and Celiktutan, Oya}, title = {A Closed-Form Solution for Debiasing Vision-Language Models with Utility Guarantees Across Modalities and Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31672-31682} }
Vista4D: Video Reshooting with 4D Point Clouds: Kuan Heng Lin,

Zhizheng Liu,

Pablo Salamanca,

Yash Kant,

Ryan Burgert,

Yuancheng Xu,

Koichi Namekata,

Yiwei Zhao,

Bolei Zhou,

Micah Goldblum,

Paul Debevec,

Ning Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Kuan Heng and Liu, Zhizheng and Salamanca, Pablo and Kant, Yash and Burgert, Ryan and Xu, Yuancheng and Namekata, Koichi and Zhao, Yiwei and Zhou, Bolei and Goldblum, Micah and Debevec, Paul and Yu, Ning}, title = {Vista4D: Video Reshooting with 4D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32671-32682} }
LookasideVLN: Direction-Aware Aerial Vision-and-Language Navigation: Yuwei Ning,

Ganlong Zhao,

Yipeng Qin,

Si Liu,

Yang Liu,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Yuwei and Zhao, Ganlong and Qin, Yipeng and Liu, Si and Liu, Yang and Lin, Liang and Li, Guanbin}, title = {LookasideVLN: Direction-Aware Aerial Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32441-32450} }
Tracking through Severe Occlusion via Event-Derived Transient Cues: Hao Dong,

Yujin Liu,

Haoyue Liu,

Zhenyu Wang,

Shihan Peng,

Zhiwei Shi,

Yi Chang,

Luxin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Hao and Liu, Yujin and Liu, Haoyue and Wang, Zhenyu and Peng, Shihan and Shi, Zhiwei and Chang, Yi and Yan, Luxin}, title = {Tracking through Severe Occlusion via Event-Derived Transient Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29526-29536} }
FoleyDirector: Fine-Grained Temporal Steering for Video-to-Audio Generation via Structured Scripts: You Li,

Dewei Zhou,

Fan Ma,

Fu Li,

Dongliang He,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, You and Zhou, Dewei and Ma, Fan and Li, Fu and He, Dongliang and Yang, Yi}, title = {FoleyDirector: Fine-Grained Temporal Steering for Video-to-Audio Generation via Structured Scripts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29254-29264} }
Stability-Driven Motion Generation for Object-Guided Human-Human Co-Manipulation: Jiahao Xu,

Xiaohan Yuan,

Xingchen Wu,

Chongyang Xu,

Kun Li,

Buzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiahao and Yuan, Xiaohan and Wu, Xingchen and Xu, Chongyang and Li, Kun and Huang, Buzhen}, title = {Stability-Driven Motion Generation for Object-Guided Human-Human Co-Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38355-38365} }
Diagnosing and Repairing Unsafe Channels in Vision-Language Models via Causal Discovery and Dual-Modal Safety Subspace Projection: Jinhu Fu,

Yihang Lou,

Qingyi Si,

Shudong Zhang,

Sen Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Jinhu and Lou, Yihang and Si, Qingyi and Zhang, Shudong and Su, Sen}, title = {Diagnosing and Repairing Unsafe Channels in Vision-Language Models via Causal Discovery and Dual-Modal Safety Subspace Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31693-31702} }
Hierarchically Robust Zero-shot Vision-language Models: Junhao Dong,

Yifei Zhang,

Hao Zhu,

Yew-Soon Ong,

Piotr Koniusz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Junhao and Zhang, Yifei and Zhu, Hao and Ong, Yew-Soon and Koniusz, Piotr}, title = {Hierarchically Robust Zero-shot Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37642-37652} }
CLEP: Contrastive Language-Pose Pretraining: Sen Jia,

Huayu Wang,

Hsiang-Wei Huang,

Zhaochong An,

Jenq-Neng Hwang,

Huaping Zhang,

Lei Li; [pdf]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Sen and Wang, Huayu and Huang, Hsiang-Wei and An, Zhaochong and Hwang, Jenq-Neng and Zhang, Huaping and Li, Lei}, title = {CLEP: Contrastive Language-Pose Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30696-30706} }
AutoDebias: An Automated Framework for Detecting and Mitigating Backdoor Biases in Text-to-Image Models: Hongyi Cai,

Mohammad Mahdinur Rahman,

MingKang Dong,

Muxin Pu,

Moayad Aloqaily,

Jie Li,

Xinfeng Li,

Jialie Shen,

Meikang Qiu,

Qingsong Wen; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Hongyi and Rahman, Mohammad Mahdinur and Dong, MingKang and Pu, Muxin and Aloqaily, Moayad and Li, Jie and Li, Xinfeng and Shen, Jialie and Qiu, Meikang and Wen, Qingsong}, title = {AutoDebias: An Automated Framework for Detecting and Mitigating Backdoor Biases in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29285-29294} }
Vision-Oriented Lightweight Neural Architecture Search with Budget-Adaptive Evaluation: Yi Fan,

Yu-Bin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yi and Yang, Yu-Bin}, title = {Vision-Oriented Lightweight Neural Architecture Search with Budget-Adaptive Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41985-41995} }
NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting: Brent Zoomers,

Florian Hahlbohm,

Joni Vanherck,

Lode Jorissen,

Marcus Magnor,

Nick Michiels; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zoomers_2026_CVPR, author = {Zoomers, Brent and Hahlbohm, Florian and Vanherck, Joni and Jorissen, Lode and Magnor, Marcus and Michiels, Nick}, title = {NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29824-29833} }
CADC: Content Adaptive Diffusion-Based Generative Image Compression: Xihua Sheng,

Lingyu Zhu,

Tianyu Zhang,

Dong Liu,

Shiqi Wang,

Jing Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Xihua and Zhu, Lingyu and Zhang, Tianyu and Liu, Dong and Wang, Shiqi and Wang, Jing}, title = {CADC: Content Adaptive Diffusion-Based Generative Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32936-32946} }
ReMoE: Region-Mixture Experts for Adversarially-Robust Vision Transformers: Qinghao Zhong,

Bingzhi Chen,

Yishu Liu,

Minhua Lu,

Guangming Lu; [pdf]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Qinghao and Chen, Bingzhi and Liu, Yishu and Lu, Minhua and Lu, Guangming}, title = {ReMoE: Region-Mixture Experts for Adversarially-Robust Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37674-37683} }
Hyperbolic Gramian Volumes for Multimodal Alignment: Saiyang Na,

Feng Jiang,

Qifeng Zhou,

Wenliang Zhong,

Thao M. Dang,

Yuzhi Guo,

Hehuan Ma,

Chunyuan Li,

Weizhi An,

Junzhou Huang; [pdf] [supp]
[bibtex]
@InProceedings{Na_2026_CVPR, author = {Na, Saiyang and Jiang, Feng and Zhou, Qifeng and Zhong, Wenliang and Dang, Thao M. and Guo, Yuzhi and Ma, Hehuan and Li, Chunyuan and An, Weizhi and Huang, Junzhou}, title = {Hyperbolic Gramian Volumes for Multimodal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37756-37765} }
ProSoftArena: Benchmarking Hierarchical Capabilities of Multi-modal Agents in Professional Software Environments: Jiaxin Ai,

Yukang Feng,

Fanrui Zhang,

Jianwen Sun,

Zizhen Li,

Chuanhao Li,

Yifan Chang,

Wenxiao Wu,

Ruoxi Wang,

Mingliang Zhai,

Kaipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ai_2026_CVPR, author = {Ai, Jiaxin and Feng, Yukang and Zhang, Fanrui and Sun, Jianwen and Li, Zizhen and Li, Chuanhao and Chang, Yifan and Wu, Wenxiao and Wang, Ruoxi and Zhai, Mingliang and Zhang, Kaipeng}, title = {ProSoftArena: Benchmarking Hierarchical Capabilities of Multi-modal Agents in Professional Software Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34586-34595} }
DynamicGTR: Leveraging Graph Topology Representation Preferences to Boost VLM Capabilities on Graph QAs: Yanbin Wei,

Jiangyue Yan,

Chun Kang,

Yang Chen,

Hua Liu,

James Kwok,

Yu Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yanbin and Yan, Jiangyue and Kang, Chun and Chen, Yang and Liu, Hua and Kwok, James and Zhang, Yu}, title = {DynamicGTR: Leveraging Graph Topology Representation Preferences to Boost VLM Capabilities on Graph QAs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40822-40832} }
BoostSLT: Boosting Sign Language Translation via a Plug-and-Play Diffusion-Based Semantic Enhancer: Changzhou Han,

Wanlun Ma,

Xi Tang,

Kun Hu,

Sheng Wen,

Yang Xiang; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Changzhou and Ma, Wanlun and Tang, Xi and Hu, Kun and Wen, Sheng and Xiang, Yang}, title = {BoostSLT: Boosting Sign Language Translation via a Plug-and-Play Diffusion-Based Semantic Enhancer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28828-28837} }
Hg-I2P: Bridging Modalities for Generalizable Image-to-Point-Cloud Registration via Heterogeneous Graphs: Pei An,

Junfeng Ding,

Jiaqi Yang,

Yulong Wang,

Jie Ma,

Liangliang Nan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Pei and Ding, Junfeng and Yang, Jiaqi and Wang, Yulong and Ma, Jie and Nan, Liangliang}, title = {Hg-I2P: Bridging Modalities for Generalizable Image-to-Point-Cloud Registration via Heterogeneous Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39042-39051} }
UniEdit-I: Training-free Image Editing for Unified VLM via Iterative Understanding, Editing and Verifying: Chengyu Bai,

Jintao Chen,

Xiang Bai,

Yilong Chen,

Qi She,

Ming Lu,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Chengyu and Chen, Jintao and Bai, Xiang and Chen, Yilong and She, Qi and Lu, Ming and Zhang, Shanghang}, title = {UniEdit-I: Training-free Image Editing for Unified VLM via Iterative Understanding, Editing and Verifying}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29750-29759} }
ZOO-Prune: Training-Free Token Pruning via Zeroth-Order Gradient Estimation in Vision-Language Models: Youngeun Kim,

Youjia Zhang,

Huiling Liu,

Aecheon Jung,

Sunwoo Lee,

Sungeun Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Youngeun and Zhang, Youjia and Liu, Huiling and Jung, Aecheon and Lee, Sunwoo and Hong, Sungeun}, title = {ZOO-Prune: Training-Free Token Pruning via Zeroth-Order Gradient Estimation in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39572-39582} }
Immunizing Models Against Harmful Long-Horizon Fine-Tuning via Contractive Optimization Dynamics: Najibul Haque Sarker,

Zaber Ibn Abdul Hakim,

Ali Asgarov,

Chia-Wei Tang,

Alvi Md Ishmam,

Chris Thomas; [pdf] [supp]
[bibtex]
@InProceedings{Sarker_2026_CVPR, author = {Sarker, Najibul Haque and Ibn Abdul Hakim, Zaber and Asgarov, Ali and Tang, Chia-Wei and Ishmam, Alvi Md and Thomas, Chris}, title = {Immunizing Models Against Harmful Long-Horizon Fine-Tuning via Contractive Optimization Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34940-34949} }
Chain-of-Thought Guided Multi-Modal Object Re-Identification: Ya Gao,

Shihao Li,

Zhaojun Liu,

Aihua Zheng,

Chenglong Li,

Jin Tang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Ya and Li, Shihao and Liu, Zhaojun and Zheng, Aihua and Li, Chenglong and Tang, Jin}, title = {Chain-of-Thought Guided Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37705-37714} }
MimicTalker: A Multimodal Interactive and Memory-Enhanced Framework for Real-Time Dyadic 3D Head Generation: Yinuo Wang,

Yanbo Fan,

Xuan Wang,

Boyao Zhou,

Yu Guo,

Yujun Shen,

Fei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yinuo and Fan, Yanbo and Wang, Xuan and Zhou, Boyao and Guo, Yu and Shen, Yujun and Wang, Fei}, title = {MimicTalker: A Multimodal Interactive and Memory-Enhanced Framework for Real-Time Dyadic 3D Head Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32399-32409} }
From Rays to Projections: Better Inputs for Feed-Forward View Synthesis: Zirui Wu,

Zeren Jiang,

Martin R. Oswald,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zirui and Jiang, Zeren and Oswald, Martin R. and Song, Jie}, title = {From Rays to Projections: Better Inputs for Feed-Forward View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29012-29022} }
Aligning Multi-Character Narrative Image Generation with Multi-Aspect Human Preferences: Ziyi Gao,

Zhipeng Wei,

Jingjing Chen,

Zhiyu Tan,

Hao Li,

Yi-Ping Phoebe Chen; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Ziyi and Wei, Zhipeng and Chen, Jingjing and Tan, Zhiyu and Li, Hao and Chen, Yi-Ping Phoebe}, title = {Aligning Multi-Character Narrative Image Generation with Multi-Aspect Human Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29244-29253} }
PP-Brep: Few-Shot B-rep Classification with Hybrid Graph Representation: Jiacheng Hao,

Chunying Liu,

Hao Guo,

Ruohan Wang,

Hongping Gan,

Yilei Shi; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Jiacheng and Liu, Chunying and Guo, Hao and Wang, Ruohan and Gan, Hongping and Shi, Yilei}, title = {PP-Brep: Few-Shot B-rep Classification with Hybrid Graph Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41616-41625} }
Mitigating Instance Entanglement in Instance-Dependent Partial Label Learning: Rui Zhao,

Bin Shi,

Kai Sun,

Bo Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rui and Shi, Bin and Sun, Kai and Dong, Bo}, title = {Mitigating Instance Entanglement in Instance-Dependent Partial Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39659-39668} }
STUR3D: Spatio-Temporal Unified Representation Learning for 3D Object Detection: Huijie Fan,

Pengrui Huang,

Qiang Wang,

Baojie Fan,

Jiahua Dong,

Liangqiong Qu; [pdf]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Huijie and Huang, Pengrui and Wang, Qiang and Fan, Baojie and Dong, Jiahua and Qu, Liangqiong}, title = {STUR3D: Spatio-Temporal Unified Representation Learning for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33068-33077} }
VideoWeaver: Multimodal Multi-View Video-to-Video Transfer for Embodied Agents: George Eskandar,

Fengyi Shen,

Mohammad Altillawi,

Dong Chen,

Yang Bai,

Liudi Yang,

Ziyuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Eskandar_2026_CVPR, author = {Eskandar, George and Shen, Fengyi and Altillawi, Mohammad and Chen, Dong and Bai, Yang and Yang, Liudi and Liu, Ziyuan}, title = {VideoWeaver: Multimodal Multi-View Video-to-Video Transfer for Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29620-29630} }
ResDiT: Evoking the Intrinsic Resolution Scalability in Diffusion Transformers: Yiyang Ma,

Feng Zhou,

Xuedan Yin,

Pu Cao,

Yonghao Dang,

Jianqin Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yiyang and Zhou, Feng and Yin, Xuedan and Cao, Pu and Dang, Yonghao and Yin, Jianqin}, title = {ResDiT: Evoking the Intrinsic Resolution Scalability in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40612-40621} }
REACH: Explicit Recovery Behavior for Diffusion Policies: Zundong Ke,

Junlin Chen,

Jiayi Zhu,

Kuanhao Xia,

Boyi Zhao,

Jiayuan Gu; [pdf] [supp]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Zundong and Chen, Junlin and Zhu, Jiayi and Xia, Kuanhao and Zhao, Boyi and Gu, Jiayuan}, title = {REACH: Explicit Recovery Behavior for Diffusion Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38498-38508} }
Tracking by Predicting 3-D Gaussians Over Time: Tanish Baranwal,

Himanshu Gaurav Singh,

Jathushan Rajasegaran,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baranwal_2026_CVPR, author = {Baranwal, Tanish and Singh, Himanshu Gaurav and Rajasegaran, Jathushan and Malik, Jitendra}, title = {Tracking by Predicting 3-D Gaussians Over Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42527-42537} }
TokenSplat: Token-aligned 3D Gaussian Splatting for Feed-forward Pose-free Reconstruction: Yihui Li,

Chengxin Lv,

Zichen Tang,

Hongyu Yang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yihui and Lv, Chengxin and Tang, Zichen and Yang, Hongyu and Huang, Di}, title = {TokenSplat: Token-aligned 3D Gaussian Splatting for Feed-forward Pose-free Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40886-40895} }
Diagnose, Correct, and Learn from Manipulation Failures via Visual Symbols: Xianchao Zeng,

Xinyu Zhou,

Youcheng Li,

Jiayou Shi,

Tianle Li,

Liangming Chen,

Lei Ren,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xianchao and Zhou, Xinyu and Li, Youcheng and Shi, Jiayou and Li, Tianle and Chen, Liangming and Ren, Lei and Li, Yong-Lu}, title = {Diagnose, Correct, and Learn from Manipulation Failures via Visual Symbols}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42386-42395} }
When Understanding Becomes a Risk: Authenticity and Safety Risks in the Emerging Image Generation Paradigm: Ye Leng,

Junjie Chu,

Mingjie Li,

Chenhao Lin,

Chao Shen,

Michael Backes,

Yun Shen,

Yang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Leng_2026_CVPR, author = {Leng, Ye and Chu, Junjie and Li, Mingjie and Lin, Chenhao and Shen, Chao and Backes, Michael and Shen, Yun and Zhang, Yang}, title = {When Understanding Becomes a Risk: Authenticity and Safety Risks in the Emerging Image Generation Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39372-39382} }
PAMotion: Physics-Aware Motion Generation for Full-Body Interaction with Multiple Objects: Yan Di,

Yuheng Li,

Yaoxing Wang,

Mengge Liu,

Shan Gao,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Di_2026_CVPR, author = {Di, Yan and Li, Yuheng and Wang, Yaoxing and Liu, Mengge and Gao, Shan and Ji, Xiangyang}, title = {PAMotion: Physics-Aware Motion Generation for Full-Body Interaction with Multiple Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30661-30674} }
Scaling View Synthesis Transformers: Evan Kim,

Hyunwoo Ryu,

Thomas W. Mitchel,

Vincent Sitzmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Evan and Ryu, Hyunwoo and Mitchel, Thomas W. and Sitzmann, Vincent}, title = {Scaling View Synthesis Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28893-28902} }
Multi-modal Test-time Adaptation via Adaptive Probabilistic Gaussian Calibration: Jinglin Xu,

Yi Li,

Chuxiong Sun,

Xiao Xu,

Jiangmeng Li,

Fanjiang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jinglin and Li, Yi and Sun, Chuxiong and Xu, Xiao and Li, Jiangmeng and Xu, Fanjiang}, title = {Multi-modal Test-time Adaptation via Adaptive Probabilistic Gaussian Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30268-30277} }
Cross-Domain Few-Shot Segmentation via Multi-view Progressive Adaptation: Jiahao Nie,

Guanqiao Fu,

Wenbin An,

Yap-Peng Tan,

Alex C. Kot,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Jiahao and Fu, Guanqiao and An, Wenbin and Tan, Yap-Peng and Kot, Alex C. and Lu, Shijian}, title = {Cross-Domain Few-Shot Segmentation via Multi-view Progressive Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41594-41604} }
No Hard Negatives Required: Concept Centric Learning Leads to Compositionality without Degrading Zero-shot Capabilities of Contrastive Models: Hai X. Pham,

David T. Hoffmann,

Ricardo Guerrero,

Brais Martinez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2026_CVPR, author = {Pham, Hai X. and Hoffmann, David T. and Guerrero, Ricardo and Martinez, Brais}, title = {No Hard Negatives Required: Concept Centric Learning Leads to Compositionality without Degrading Zero-shot Capabilities of Contrastive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33901-33910} }
Think-Then-Generate: Structural Chain-of-Thought Reasoning for Consistent 3D Generation: Xinyue Liu,

Jin Liu,

Hongbo Wang,

Ran He,

Huaibo Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinyue and Liu, Jin and Wang, Hongbo and He, Ran and Huang, Huaibo}, title = {Think-Then-Generate: Structural Chain-of-Thought Reasoning for Consistent 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34270-34280} }
Structural-Semantic Perception for Diffusion-Guided Temporal Forgery Localization: Ligong Cao,

Yeting Guo,

Haoang Chi; [pdf]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Ligong and Guo, Yeting and Chi, Haoang}, title = {Structural-Semantic Perception for Diffusion-Guided Temporal Forgery Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35384-35393} }
OpenT2M: No-frill Motion Generation with Open-source, Large-scale, High-quality Data: Bin Cao,

Sipeng Zheng,

Hao Luo,

Boyuan Li,

Jing Liu,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Bin and Zheng, Sipeng and Luo, Hao and Li, Boyuan and Liu, Jing and Lu, Zongqing}, title = {OpenT2M: No-frill Motion Generation with Open-source, Large-scale, High-quality Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30640-30649} }
DPL: Decoupled Prototype Learning for Enhancing Robustness of Vision-Language Transformers to Missing Modalities: Jueqing Lu,

Yuanyuan Qi,

Xiaohao Yang,

Shuaicheng Niu,

Fucai Ke,

Shujie Zhou,

Wei Tan,

Jionghao Lin,

Wray Buntine,

Hamid Rezatofighi,

Lan Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jueqing and Qi, Yuanyuan and Yang, Xiaohao and Niu, Shuaicheng and Ke, Fucai and Zhou, Shujie and Tan, Wei and Lin, Jionghao and Buntine, Wray and Rezatofighi, Hamid and Du, Lan}, title = {DPL: Decoupled Prototype Learning for Enhancing Robustness of Vision-Language Transformers to Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39219-39229} }
Toward Generalizable Whole Brain Representations with High-Resolution Light-Sheet Data: Minyoung E. Kim,

Dae Hee Yun,

Aditi V. Patel,

Madeline Hon,

Webster Guan,

Taegeon Lee,

Brian Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung E. and Yun, Dae Hee and Patel, Aditi V. and Hon, Madeline and Guan, Webster and Lee, Taegeon and Nguyen, Brian}, title = {Toward Generalizable Whole Brain Representations with High-Resolution Light-Sheet Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35270-35279} }
Unified Personalized Understanding, Generating and Editing: Yu Zhong,

Tianwei Lin,

Ruike Zhu,

Yuqian Yuan,

Haoyu Zheng,

Liang Liang,

Wenqiao Zhang,

Feifei Shao,

Haoyuan Li,

Wanggui He,

Hao Jiang,

Yueting Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yu and Lin, Tianwei and Zhu, Ruike and Yuan, Yuqian and Zheng, Haoyu and Liang, Liang and Zhang, Wenqiao and Shao, Feifei and Li, Haoyuan and He, Wanggui and Jiang, Hao and Zhuang, Yueting}, title = {Unified Personalized Understanding, Generating and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29400-29409} }
OpenVision 2: A Family of Generative Pretrained Visual Encoders for Multimodal Learning: Yanqing Liu,

Xianhang Li,

Letian Zhang,

Zirui Wang,

Zeyu Zheng,

Yuyin Zhou,

Cihang Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yanqing and Li, Xianhang and Zhang, Letian and Wang, Zirui and Zheng, Zeyu and Zhou, Yuyin and Xie, Cihang}, title = {OpenVision 2: A Family of Generative Pretrained Visual Encoders for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39164-39174} }
FailureAtlas: Mapping the Failure Landscape of T2I Models via Active Exploration: Muxi Chen,

Zhaohua Zhang,

Chenchen Zhao,

Mingyang Chen,

Wenyu Jiang,

Tianwen Jiang,

Jianhuan Zhuo,

Yu Tang,

Qiuyong Xiao,

Jihong Zhang,

Qiang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Muxi and Zhang, Zhaohua and Zhao, Chenchen and Chen, Mingyang and Jiang, Wenyu and Jiang, Tianwen and Zhuo, Jianhuan and Tang, Yu and Xiao, Qiuyong and Zhang, Jihong and Xu, Qiang}, title = {FailureAtlas: Mapping the Failure Landscape of T2I Models via Active Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40782-40791} }
When Pretty Isn't Useful: Investigating Why Modern Text-to-Image Models Fail as Reliable Training Data Generators: Krzysztof Adamkiewicz,

Brian B. Moser,

Stanislav Frolov,

Tobias Christian Nauen,

Federico Raue,

Andreas Dengel; [pdf] [supp]
[bibtex]
@InProceedings{Adamkiewicz_2026_CVPR, author = {Adamkiewicz, Krzysztof and Moser, Brian B. and Frolov, Stanislav and Nauen, Tobias Christian and Raue, Federico and Dengel, Andreas}, title = {When Pretty Isn't Useful: Investigating Why Modern Text-to-Image Models Fail as Reliable Training Data Generators}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36660-36669} }
Thinking with Drafts: Speculative Temporal Reasoning for Efficient Long Video Understanding: Pengfei Hu,

Meng Cao,

Yingyao Wang,

Yi Wang,

Jiahua Dong,

Jun Song,

Yu Cheng,

Bo Zheng,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Pengfei and Cao, Meng and Wang, Yingyao and Wang, Yi and Dong, Jiahua and Song, Jun and Cheng, Yu and Zheng, Bo and Liang, Xiaodan}, title = {Thinking with Drafts: Speculative Temporal Reasoning for Efficient Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36255-36266} }
RGB-Event based Pedestrian Attribute Recognition: A Benchmark Dataset and An Asymmetric RWKV Fusion Framework: Xiao Wang,

Haiyang Wang,

Shiao Wang,

Qiang Chen,

Jiandong Jin,

Haoyu Song,

Bo Jiang,

Chenglong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiao and Wang, Haiyang and Wang, Shiao and Chen, Qiang and Jin, Jiandong and Song, Haoyu and Jiang, Bo and Li, Chenglong}, title = {RGB-Event based Pedestrian Attribute Recognition: A Benchmark Dataset and An Asymmetric RWKV Fusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32745-32755} }
R4: Retrieval-Augmented Reasoning for Vision-Language Models in 4D Spatio-Temporal Space: Tin Stribor Sohn,

Maximilian Dillitzer,

Jason J. Corso,

Eric Sax; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sohn_2026_CVPR, author = {Sohn, Tin Stribor and Dillitzer, Maximilian and Corso, Jason J. and Sax, Eric}, title = {R4: Retrieval-Augmented Reasoning for Vision-Language Models in 4D Spatio-Temporal Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38711-38721} }
AERGS-SLAM: Auto-Exposure-Robust Stereo 3D Gaussian Splatting SLAM: Zhiyu Zhou,

Feng Hui,

Yu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhiyu and Hui, Feng and Liu, Yu}, title = {AERGS-SLAM: Auto-Exposure-Robust Stereo 3D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40929-40938} }
Photo3D: Advancing Photorealistic 3D Generation through Structure-Aligned Detail Enhancement: Xinyue Liang,

Zhiyuan Ma,

Lingchen Sun,

Yanjun Guo,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Xinyue and Ma, Zhiyuan and Sun, Lingchen and Guo, Yanjun and Zhang, Lei}, title = {Photo3D: Advancing Photorealistic 3D Generation through Structure-Aligned Detail Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34237-34247} }
LiREC-Net: A Target-Free and Learning-Based Network for LiDAR, RGB, and Event Calibration: Aditya Ranjan Dash,

Ramy Battrawy,

René Schuster,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dash_2026_CVPR, author = {Dash, Aditya Ranjan and Battrawy, Ramy and Schuster, Ren\'e and Stricker, Didier}, title = {LiREC-Net: A Target-Free and Learning-Based Network for LiDAR, RGB, and Event Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31421-31429} }
Drainage: A Unifying Framework for Addressing Class Uncertainty: Yasser Taha,

Grégoire Montavon,

Nils Körber; [pdf] [supp]
[bibtex]
@InProceedings{Taha_2026_CVPR, author = {Taha, Yasser and Montavon, Gr\'egoire and K\"orber, Nils}, title = {Drainage: A Unifying Framework for Addressing Class Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41934-41943} }
Restore Text First, Enhance Image Later: Two-Stage Scene Text Image Super-Resolution with Glyph Structure Guidance: Minxing Luo,

Linlong Fan,

Qiushi Wang,

Ge Wu,

Yiyan Luo,

Yuhang Yu,

Jinwei Chen,

Yaxing Wang,

Qingnan Fan,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Minxing and Fan, Linlong and Wang, Qiushi and Wu, Ge and Luo, Yiyan and Yu, Yuhang and Chen, Jinwei and Wang, Yaxing and Fan, Qingnan and Yang, Jian}, title = {Restore Text First, Enhance Image Later: Two-Stage Scene Text Image Super-Resolution with Glyph Structure Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30553-30563} }
Evo-Retriever: LLM-Guided Curriculum Evolution with Viewpoint-Pathway Collaboration for Multimodal Document Retrieval: Weiqing Li,

Jinyue Guo,

Yaqi Wang,

Haiyang Xiao,

Yuewei Zhang,

Guohua Liu,

Hao Henry Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiqing and Guo, Jinyue and Wang, Yaqi and Xiao, Haiyang and Zhang, Yuewei and Liu, Guohua and Wang, Hao Henry}, title = {Evo-Retriever: LLM-Guided Curriculum Evolution with Viewpoint-Pathway Collaboration for Multimodal Document Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31113-31123} }
G-MIXER: Geodesic Mixup-based Implicit Semantic Expansion and Explicit Semantic Re-ranking for Zero-Shot Composed Image Retrieval: Jiyoung Lim,

Heejae Yang,

Jee-Hyong Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Jiyoung and Yang, Heejae and Lee, Jee-Hyong}, title = {G-MIXER: Geodesic Mixup-based Implicit Semantic Expansion and Explicit Semantic Re-ranking for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33891-33900} }
Reconstructing CLIP for Open-Vocabulary Dense Perception: Yajie Liu,

Jinjin Zhang,

Qingjie Liu,

Di Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yajie and Zhang, Jinjin and Liu, Qingjie and Huang, Di}, title = {Reconstructing CLIP for Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39208-39218} }
Cross-Axis Feature Fusion with Joint-Wise Motion Difference Prediction for Text-Based 3D Human Motion Editing: Gyojin Han,

Junmo Kim; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Gyojin and Kim, Junmo}, title = {Cross-Axis Feature Fusion with Joint-Wise Motion Difference Prediction for Text-Based 3D Human Motion Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30618-30628} }
VIRD: View-Invariant Representation through Dual-Axis Transformation for Cross-View Pose Estimation: Juhye Park,

Wooju Lee,

Dasol Hong,

Changki Sung,

Youngwoo Seo,

Dongwan Kang,

Hyun Myung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Juhye and Lee, Wooju and Hong, Dasol and Sung, Changki and Seo, Youngwoo and Kang, Dongwan and Myung, Hyun}, title = {VIRD: View-Invariant Representation through Dual-Axis Transformation for Cross-View Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33693-33702} }
TimeBridge: Self-Supervised Video Representation Learning via Start-End Joint Embedding and In-Between Frame Prediction: Qin Wang,

Abigail Morrison,

Hanno Scharr,

Kai Krajsek; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qin and Morrison, Abigail and Scharr, Hanno and Krajsek, Kai}, title = {TimeBridge: Self-Supervised Video Representation Learning via Start-End Joint Embedding and In-Between Frame Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39647-39658} }
Thinking With Videos: Multimodal Tool-Augmented Reinforcement Learning for Long Video Reasoning: Haoji Zhang,

Xin Gu,

Jiawen Li,

Chixiang Ma,

Sule Bai,

Chubin Zhang,

Bowen Zhang,

Zhichao Zhou,

Dongliang He,

Yansong Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haoji and Gu, Xin and Li, Jiawen and Ma, Chixiang and Bai, Sule and Zhang, Chubin and Zhang, Bowen and Zhou, Zhichao and He, Dongliang and Tang, Yansong}, title = {Thinking With Videos: Multimodal Tool-Augmented Reinforcement Learning for Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32903-32914} }
High-Fidelity Mobile Avatars with Pruned Local Blendshapes: Youyi Zhan,

He Wang,

Tianjia Shao,

Kun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Youyi and Wang, He and Shao, Tianjia and Zhou, Kun}, title = {High-Fidelity Mobile Avatars with Pruned Local Blendshapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32345-32356} }
Neural Differentiation in Deep Networks: A Theoretical Framework for Expressivity and Representational Diversity: Boyuan Wang,

Richard Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Boyuan and Jiang, Richard}, title = {Neural Differentiation in Deep Networks: A Theoretical Framework for Expressivity and Representational Diversity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41944-41953} }
ShapeAR: Generating Editable Shape Layers via Autoregressive Diffusion: Souymodip Chakraborty,

Ankur Singh,

Amit Vikram Singh,

Vineet Batra,

Ankit Phogat; [pdf]
[bibtex]
@InProceedings{Chakraborty_2026_CVPR, author = {Chakraborty, Souymodip and Singh, Ankur and Singh, Amit Vikram and Batra, Vineet and Phogat, Ankit}, title = {ShapeAR: Generating Editable Shape Layers via Autoregressive Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40664-40673} }
Towards Stable Federated Continual Test-Time Adaptation in Wild World: Liwen Wang,

Xingbo Dong,

Iman Yi Liao,

Zhe Jin; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Liwen and Dong, Xingbo and Liao, Iman Yi and Jin, Zhe}, title = {Towards Stable Federated Continual Test-Time Adaptation in Wild World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29451-29461} }
Beyond Ground-Truth: Leveraging Image Quality Priors for Real-World Image Restoration: Fengyang Xiao,

Peng Hu,

Lei Xu,

XingE Guo,

Guanyi Qin,

Yuqi Shen,

Chengyu Fang,

Rihan Zhang,

Chunming He,

Sina Farsiu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Fengyang and Hu, Peng and Xu, Lei and Guo, XingE and Qin, Guanyi and Shen, Yuqi and Fang, Chengyu and Zhang, Rihan and He, Chunming and Farsiu, Sina}, title = {Beyond Ground-Truth: Leveraging Image Quality Priors for Real-World Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29897-29908} }
Hybrid Token Compression for Vision-Language Models: Jusheng Zhang,

Xiaoyang Guo,

Kaitong Cai,

Qinhan Lv,

Yijia Fan,

Wenhao Chai,

Jian Wang,

Keze Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Guo, Xiaoyang and Cai, Kaitong and Lv, Qinhan and Fan, Yijia and Chai, Wenhao and Wang, Jian and Wang, Keze}, title = {Hybrid Token Compression for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31889-31899} }
Trust-calibrated Collaborative Learning for Long-Tailed Visual Recognition: Hao Zhou,

Tingjin Luo; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Luo, Tingjin}, title = {Trust-calibrated Collaborative Learning for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40866-40875} }
Parameter-Efficient Adaptation for MLLMs via Implicit Modality Decomposition: Mingfang Zhang,

Yunhong Wang,

Lu Wang,

Jiaxin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingfang and Wang, Yunhong and Wang, Lu and Chen, Jiaxin}, title = {Parameter-Efficient Adaptation for MLLMs via Implicit Modality Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37745-37755} }
Rethinking Two-Stage Referring-by-Tracking in Referring Multi-Object Tracking: Make it Strong Again: Weize Li,

Yunhao Du,

Qixiang Yin,

Zhicheng Zhao,

Fei Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weize and Du, Yunhao and Yin, Qixiang and Zhao, Zhicheng and Su, Fei}, title = {Rethinking Two-Stage Referring-by-Tracking in Referring Multi-Object Tracking: Make it Strong Again}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42549-42559} }
StreamDiT: Real-Time Streaming Text-to-Video Generation: Akio Kodaira,

Tingbo Hou,

Ji Hou,

Markos Georgopoulos,

Felix Juefei-Xu,

Masayoshi Tomizuka,

Yue Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kodaira_2026_CVPR, author = {Kodaira, Akio and Hou, Tingbo and Hou, Ji and Georgopoulos, Markos and Juefei-Xu, Felix and Tomizuka, Masayoshi and Zhao, Yue}, title = {StreamDiT: Real-Time Streaming Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29200-29210} }
Roots Beneath the Cut: Uncovering the Risk of Concept Revival in Pruning-Based Unlearning for Diffusion Models: Ci Zhang,

Zhaojun Ding,

Chence Yang,

Jun Liu,

Xiaoming Zhai,

Shaoyi Huang,

Beiwen Li,

Xiaolong Ma,

Jin Lu,

Geng Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ci and Ding, Zhaojun and Yang, Chence and Liu, Jun and Zhai, Xiaoming and Huang, Shaoyi and Li, Beiwen and Ma, Xiaolong and Lu, Jin and Yuan, Geng}, title = {Roots Beneath the Cut: Uncovering the Risk of Concept Revival in Pruning-Based Unlearning for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35872-35881} }
Efficient Unrolled Networks for Large-Scale 3D Inverse Problems: Romain Vo,

Julián Tachella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vo_2026_CVPR, author = {Vo, Romain and Tachella, Juli\'an}, title = {Efficient Unrolled Networks for Large-Scale 3D Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36343-36353} }
See Less, See Right: Bi-directional Perceptual Shaping For Multimodal Reasoning: Shuoshuo Zhang,

Yizhen Zhang,

Jingjing Fu,

Lei Song,

Jiang Bian,

Yujiu Yang,

Rui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuoshuo and Zhang, Yizhen and Fu, Jingjing and Song, Lei and Bian, Jiang and Yang, Yujiu and Wang, Rui}, title = {See Less, See Right: Bi-directional Perceptual Shaping For Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33499-33509} }
Mesh-Pro: Asynchronous Advantage-guided Ranking Preference Optimization for Artist-style Quadrilateral Mesh Generation: Zhen Zhou,

Jian Liu,

Biwen Lei,

Jing Xu,

Haohan Weng,

Yiling Zhu,

Zhuo Chen,

Junfeng Fan,

Yunkai Ma,

Dazhao Du,

Song Guo,

Fengshui Jing,

Chunchao Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhen and Liu, Jian and Lei, Biwen and Xu, Jing and Weng, Haohan and Zhu, Yiling and Chen, Zhuo and Fan, Junfeng and Ma, Yunkai and Du, Dazhao and Guo, Song and Jing, Fengshui and Guo, Chunchao}, title = {Mesh-Pro: Asynchronous Advantage-guided Ranking Preference Optimization for Artist-style Quadrilateral Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34248-34258} }
Causality in Video Diffusers is Separable from Denoising: Xingjian Bai,

Guande He,

Zhengqi Li,

Eli Shechtman,

Xun Huang,

Zongze Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Xingjian and He, Guande and Li, Zhengqi and Shechtman, Eli and Huang, Xun and Wu, Zongze}, title = {Causality in Video Diffusers is Separable from Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43373-43384} }
X-PCR: A Benchmark for Cross-modality Progressive Clinical Reasoning in Ophthalmic Diagnosis: Gui Wang,

Zehao Zhong,

YongSong Zhou,

Yudong Li,

Ende Wu,

Wooi Ping Cheah,

Rong Qu,

Jianfeng Ren,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Gui and Zhong, Zehao and Zhou, YongSong and Li, Yudong and Wu, Ende and Cheah, Wooi Ping and Qu, Rong and Ren, Jianfeng and Shen, Linlin}, title = {X-PCR: A Benchmark for Cross-modality Progressive Clinical Reasoning in Ophthalmic Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33110-33120} }
Task-Oriented Data Synthesis and Control-Rectify Sampling for Remote Sensing Semantic Segmentation: Yunkai Yang,

Yudong Zhang,

Kunquan Zhang,

Jinxiao Zhang,

Xinying Chen,

Haohuan Fu,

Runmin Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yunkai and Zhang, Yudong and Zhang, Kunquan and Zhang, Jinxiao and Chen, Xinying and Fu, Haohuan and Dong, Runmin}, title = {Task-Oriented Data Synthesis and Control-Rectify Sampling for Remote Sensing Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42147-42157} }
Recurrent Reasoning with Vision-Language Models for Estimating Long-Horizon Embodied Task Progress: Yuelin Zhang,

Sijie Cheng,

Chen Li,

Zongzhao Li,

Yuxin Huang,

Yang Liu,

Wenbing Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuelin and Cheng, Sijie and Li, Chen and Li, Zongzhao and Huang, Yuxin and Liu, Yang and Huang, Wenbing}, title = {Recurrent Reasoning with Vision-Language Models for Estimating Long-Horizon Embodied Task Progress}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41150-41159} }
SRA 2: Variational Autoencoder Self-Representation Alignment for Efficient Diffusion Training: Mengmeng Wang,

Dengyang Jiang,

Liuzhuozheng Li,

Yucheng Lin,

Guojiang Shen,

Xiangjie Kong,

Yong Liu,

Guang Dai,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Mengmeng and Jiang, Dengyang and Li, Liuzhuozheng and Lin, Yucheng and Shen, Guojiang and Kong, Xiangjie and Liu, Yong and Dai, Guang and Wang, Jingdong}, title = {SRA 2: Variational Autoencoder Self-Representation Alignment for Efficient Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32978-32987} }
InvCoSS: Inversion-driven Continual Self-supervised Learning in Medical Multi-modal Image Pre-training: Zihao Luo,

Shaohao Rui,

Zhenyu Tang,

Guotai Wang,

Xiaosong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Zihao and Rui, Shaohao and Tang, Zhenyu and Wang, Guotai and Wang, Xiaosong}, title = {InvCoSS: Inversion-driven Continual Self-supervised Learning in Medical Multi-modal Image Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42626-42636} }
Beyond Single Images: A Comprehensive Benchmark for Album-Level Vision-Language Understanding: Shawn Huang,

Brian Price,

Yifei Fan,

Bryan Morse; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Shawn and Price, Brian and Fan, Yifei and Morse, Bryan}, title = {Beyond Single Images: A Comprehensive Benchmark for Album-Level Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38564-38573} }
AToken: A Unified Tokenizer for Vision: Jiasen Lu,

Liangchen Song,

Mingze Xu,

Byeongjoo Ahn,

Yanjun Wang,

Chen Chen,

Afshin Dehghan,

Yinfei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jiasen and Song, Liangchen and Xu, Mingze and Ahn, Byeongjoo and Wang, Yanjun and Chen, Chen and Dehghan, Afshin and Yang, Yinfei}, title = {AToken: A Unified Tokenizer for Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28701-28711} }
Urban-GS: A Unified 3D Gaussian Splatting Framework for Compact and High-Fidelity Aerial-to-Street Reconstruction: Meng Wang,

Changqun Xia,

Yuze Wang,

Junyi Wang,

Wantong Duan,

Xinxiong Xie,

Yue Qi; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Meng and Xia, Changqun and Wang, Yuze and Wang, Junyi and Duan, Wantong and Xie, Xinxiong and Qi, Yue}, title = {Urban-GS: A Unified 3D Gaussian Splatting Framework for Compact and High-Fidelity Aerial-to-Street Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33207-33216} }
Bridging RGB and Hematoxylin Components: An Interleaved Guidance and Fusion Framework for Point Supervised Nuclei Segmentation: Zihan Huan,

Xipeng Pan,

Hualong Zhang,

Siyang Feng,

Rushi Lan,

Huadeng Wang,

Haoxiang Lu,

Zhenbing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huan_2026_CVPR, author = {Huan, Zihan and Pan, Xipeng and Zhang, Hualong and Feng, Siyang and Lan, Rushi and Wang, Huadeng and Lu, Haoxiang and Liu, Zhenbing}, title = {Bridging RGB and Hematoxylin Components: An Interleaved Guidance and Fusion Framework for Point Supervised Nuclei Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37518-37527} }
Masking Matters: Unlocking the Spatial Reasoning Capabilities of LLMs for 3D Scene-Language Understanding: Yerim Jeon,

Miso Lee,

WonJun Moon,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2026_CVPR, author = {Jeon, Yerim and Lee, Miso and Moon, WonJun and Heo, Jae-Pil}, title = {Masking Matters: Unlocking the Spatial Reasoning Capabilities of LLMs for 3D Scene-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38668-38677} }
PriVi: Towards a General-Purpose Video Model for Primate Behavior in the Wild: Felix B. Mueller,

Jan F. Meier,

Timo Lueddecke,

Richard Vogg,

Roger L. Freixanet,

Valentin Hassler,

Tiffany Bosshard,

Elif Karakoc,

William J. O'Hearn,

Sofia M. Pereira,

Sandro Sehner,

Kaja Wierucka,

Judith Burkart,

Claudia Fichtel,

Julia Fischer,

Alexander Gail,

Catherine Hobaiter,

Julia Ostner,

Liran Samuni,

Oliver Schülke,

Neda Shahidi,

Erin G. Wessling,

Alexander S. Ecker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mueller_2026_CVPR, author = {Mueller, Felix B. and Meier, Jan F. and Lueddecke, Timo and Vogg, Richard and Freixanet, Roger L. and Hassler, Valentin and Bosshard, Tiffany and Karakoc, Elif and O'Hearn, William J. and Pereira, Sofia M. and Sehner, Sandro and Wierucka, Kaja and Burkart, Judith and Fichtel, Claudia and Fischer, Julia and Gail, Alexander and Hobaiter, Catherine and Ostner, Julia and Samuni, Liran and Sch\"ulke, Oliver and Shahidi, Neda and Wessling, Erin G. and Ecker, Alexander S.}, title = {PriVi: Towards a General-Purpose Video Model for Primate Behavior in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38804-38815} }
CHIPS: Efficient CLIP Adaptation via Curvature-aware Hybrid Influence-based Data Selection: Xinlin Zhuang,

Yichen Li,

Xiwei Liu,

Haolin Yang,

Yifan Lu,

Ziyun Zou,

Yulong Li,

Huifa Li,

Dongliang Chen,

Qinglei Wang,

Weiyang Liu,

Ying Qian,

Jiangming Shi,

Imran Razzak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xinlin and Li, Yichen and Liu, Xiwei and Yang, Haolin and Lu, Yifan and Zou, Ziyun and Li, Yulong and Li, Huifa and Chen, Dongliang and Wang, Qinglei and Liu, Weiyang and Qian, Ying and Shi, Jiangming and Razzak, Imran}, title = {CHIPS: Efficient CLIP Adaptation via Curvature-aware Hybrid Influence-based Data Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29483-29493} }
DiP: Taming Diffusion Models in Pixel Space: Zhennan Chen,

Junwei Zhu,

Xu Chen,

Jiangning Zhang,

Xiaobin Hu,

Hanzhen Zhao,

Chengjie Wang,

Jian Yang,

Ying Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhennan and Zhu, Junwei and Chen, Xu and Zhang, Jiangning and Hu, Xiaobin and Zhao, Hanzhen and Wang, Chengjie and Yang, Jian and Tai, Ying}, title = {DiP: Taming Diffusion Models in Pixel Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36136-36146} }
ReasonX: MLLM-Guided Intrinsic Image Decomposition: Alara Dirik,

Tuanfeng Yang Wang,

Duygu Ceylan,

Stefanos Zafeiriou,

Anna Frühstück; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dirik_2026_CVPR, author = {Dirik, Alara and Wang, Tuanfeng Yang and Ceylan, Duygu and Zafeiriou, Stefanos and Fr\"uhst\"uck, Anna}, title = {ReasonX: MLLM-Guided Intrinsic Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30802-30812} }
ReLaX: Reasoning with Latent Exploration for Large Reasoning Models: Shimin Zhang,

Xianwei Chen,

Yufan Shen,

Ziyuan Ye,

Jibin Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shimin and Chen, Xianwei and Shen, Yufan and Ye, Ziyuan and Wu, Jibin}, title = {ReLaX: Reasoning with Latent Exploration for Large Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33761-33771} }
BrickNet: Graph-Backed Generative Brick Assembly: Peter Kulits,

Cordelia Schmid; [pdf] [arXiv]
[bibtex]
@InProceedings{Kulits_2026_CVPR, author = {Kulits, Peter and Schmid, Cordelia}, title = {BrickNet: Graph-Backed Generative Brick Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39252-39261} }
Diffusion Mental Averages: Phonphrm Thawatdamrongkit,

Sukit Seripanitkarn,

Supasorn Suwajanakorn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thawatdamrongkit_2026_CVPR, author = {Thawatdamrongkit, Phonphrm and Seripanitkarn, Sukit and Suwajanakorn, Supasorn}, title = {Diffusion Mental Averages}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35713-35725} }
SFR-Net: Steering-Fusion-Refining Network in Multi-label Zero-Shot Sewer Defect Detection: Zhao-Min Chen,

Xinjian Huang,

Yisu Ge,

Yu Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhao-Min and Huang, Xinjian and Ge, Yisu and Li, Yu}, title = {SFR-Net: Steering-Fusion-Refining Network in Multi-label Zero-Shot Sewer Defect Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41636-41645} }
LumiMotion: Improving Gaussian Relighting with Scene Dynamics: Joanna Kaleta,

Piotr Wójcik,

Kacper Marzol,

Tomasz Trzcinski,

Kacper Kania,

Marek Kowalski; [pdf] [supp]
[bibtex]
@InProceedings{Kaleta_2026_CVPR, author = {Kaleta, Joanna and W\'ojcik, Piotr and Marzol, Kacper and Trzcinski, Tomasz and Kania, Kacper and Kowalski, Marek}, title = {LumiMotion: Improving Gaussian Relighting with Scene Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37311-37321} }
D2Dewarp: Dual Dimensions Geometric Representation Learning Based Document Image Dewarping: Heng Li,

Xiangping Wu,

Qingcai Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Heng and Wu, Xiangping and Chen, Qingcai}, title = {D2Dewarp: Dual Dimensions Geometric Representation Learning Based Document Image Dewarping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34734-34744} }
SegMoTE: Token-Level Mixture of Experts for Medical Image Segmentation: Yujie Lu,

Jingwen Li,

Sibo Ju,

Yanzhou Su,

He Yao,

Yisong Liu,

Min Zhu,

Junlong Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yujie and Li, Jingwen and Ju, Sibo and Su, Yanzhou and Yao, He and Liu, Yisong and Zhu, Min and Cheng, Junlong}, title = {SegMoTE: Token-Level Mixture of Experts for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36332-36342} }
STAR: Test-Time Adaptation Can Enhance Universal Prompt Learning for Vision-Language Models: Yiwei Fu,

Hui Wan,

Xiao Luo,

Minghua Deng; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Yiwei and Wan, Hui and Luo, Xiao and Deng, Minghua}, title = {STAR: Test-Time Adaptation Can Enhance Universal Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31482-31492} }
Semantic Context Matters: Improving Conditioning for Autoregressive Models: Dongyang Jin,

Ryan Xu,

Jianhao Zeng,

Rui Lan,

Yancheng Bai,

Lei Sun,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Dongyang and Xu, Ryan and Zeng, Jianhao and Lan, Rui and Bai, Yancheng and Sun, Lei and Chu, Xiangxiang}, title = {Semantic Context Matters: Improving Conditioning for Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30402-30413} }
LiteVGGT: Boosting Vanilla VGGT via Geometry-aware Cached Token Merging: Zhijian Shu,

Cheng Lin,

Tao Xie,

Wei Yin,

Ben Li,

Zhiyuan Pu,

Weize Li,

Yao Yao,

Xun Cao,

Xiaoyang Guo,

Xiao-Xiao Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shu_2026_CVPR, author = {Shu, Zhijian and Lin, Cheng and Xie, Tao and Yin, Wei and Li, Ben and Pu, Zhiyuan and Li, Weize and Yao, Yao and Cao, Xun and Guo, Xiaoyang and Long, Xiao-Xiao}, title = {LiteVGGT: Boosting Vanilla VGGT via Geometry-aware Cached Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36422-36432} }
Adapter Shield: A Unified Framework with Built-in Authentication for Preventing Unauthorized Zero-Shot Image-to-Image Generation: Jun Jia,

Hongyi Miao,

Yingjie Zhou,

Wangqiu Zhou,

Jianbo Zhang,

Linhan Cao,

Dandan Zhu,

Hua Yang,

Xiongkuo Min,

Wei Sun,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Jun and Miao, Hongyi and Zhou, Yingjie and Zhou, Wangqiu and Zhang, Jianbo and Cao, Linhan and Zhu, Dandan and Yang, Hua and Min, Xiongkuo and Sun, Wei and Zhai, Guangtao}, title = {Adapter Shield: A Unified Framework with Built-in Authentication for Preventing Unauthorized Zero-Shot Image-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30120-30129} }
Med-CMR: A Fine-Grained Benchmark Integrating Visual Evidence and Clinical Logic for Medical Complex Multimodal Reasoning: Haozhen Gong,

Xiaozhong Ji,

Yuansen Liu,

Wenbin Wu,

Xiaoxiao Yan,

Jingjing Liu,

Kai Wu,

Jiazhen Pan,

Bailiang Jian,

Jiangning Zhang,

Xiaobin Hu,

Hongwei Bran Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Haozhen and Ji, Xiaozhong and Liu, Yuansen and Wu, Wenbin and Yan, Xiaoxiao and Liu, Jingjing and Wu, Kai and Pan, Jiazhen and Jian, Bailiang and Zhang, Jiangning and Hu, Xiaobin and Li, Hongwei Bran}, title = {Med-CMR: A Fine-Grained Benchmark Integrating Visual Evidence and Clinical Logic for Medical Complex Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41224-41234} }
Improving Sparse Autoencoder with Dynamic Attention: Dongsheng Wang,

Jinsen Zhang,

Dawei Su,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dongsheng and Zhang, Jinsen and Su, Dawei and Huang, Hui}, title = {Improving Sparse Autoencoder with Dynamic Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41996-42006} }
SwitchCraft: Training-Free Multi-Event Video Generation with Attention Controls: Qianxun Xu,

Chenxi Song,

Yujun Cai,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qianxun and Song, Chenxi and Cai, Yujun and Zhang, Chi}, title = {SwitchCraft: Training-Free Multi-Event Video Generation with Attention Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29136-29145} }
Hear you are: Teaching LLMs Spatial Reasoning with Vision and Spatial Sound: Hyeonggon Ryu,

Joon Son Chung,

David Harwath; [pdf] [supp]
[bibtex]
@InProceedings{Ryu_2026_CVPR, author = {Ryu, Hyeonggon and Chung, Joon Son and Harwath, David}, title = {Hear you are: Teaching LLMs Spatial Reasoning with Vision and Spatial Sound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38606-38615} }
OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation: Henry Herzog,

Favyen Bastani,

Yawen Zhang,

Gabriel Tseng,

Joseph Redmon,

Hadrien Sablon,

Ryan Park,

Jacob Morrison,

Alexandra Buraczynski,

Karen Farley,

Josh Hansen,

Andrew Howe,

Patrick Alan Johnson,

Mark Otterlee,

Ted Schmitt,

Hunter Pitelka,

Stephen Daspit,

Rachel Ratner,

Christopher Wilhelm,

Sebastian Wood,

Mike Jacobi,

Hannah Kerner,

Evan Shelhamer,

Ali Farhadi,

Ranjay Krishna,

Patrick Beukema; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Herzog_2026_CVPR, author = {Herzog, Henry and Bastani, Favyen and Zhang, Yawen and Tseng, Gabriel and Redmon, Joseph and Sablon, Hadrien and Park, Ryan and Morrison, Jacob and Buraczynski, Alexandra and Farley, Karen and Hansen, Josh and Howe, Andrew and Johnson, Patrick Alan and Otterlee, Mark and Schmitt, Ted and Pitelka, Hunter and Daspit, Stephen and Ratner, Rachel and Wilhelm, Christopher and Wood, Sebastian and Jacobi, Mike and Kerner, Hannah and Shelhamer, Evan and Farhadi, Ali and Krishna, Ranjay and Beukema, Patrick}, title = {OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34806-34817} }
OmniBrainBench: A Comprehensive Multimodal Benchmark for Brain Imaging Analysis Across Multi-stage Clinical Tasks: Zhihao Peng,

Cheng Wang,

Shengyuan Liu,

Zhiying Liang,

Zanting Ye,

Min Jie Ju,

Peter YM Woo,

Yixuan Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Zhihao and Wang, Cheng and Liu, Shengyuan and Liang, Zhiying and Ye, Zanting and Ju, Min Jie and Woo, Peter YM and Yuan, Yixuan}, title = {OmniBrainBench: A Comprehensive Multimodal Benchmark for Brain Imaging Analysis Across Multi-stage Clinical Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42732-42743} }
Beyond Semantic Search: Towards Referential Anchoring in Composed Image Retrieval: Yuxin Yang,

Yinan Zhou,

Yuxin Chen,

Ziqi Zhang,

Zongyang Ma,

Chunfeng Yuan,

Bing Li,

Jun Gao,

Weiming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuxin and Zhou, Yinan and Chen, Yuxin and Zhang, Ziqi and Ma, Zongyang and Yuan, Chunfeng and Li, Bing and Gao, Jun and Hu, Weiming}, title = {Beyond Semantic Search: Towards Referential Anchoring in Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31155-31165} }
UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders: Matthew Walmer,

Saksham Suri,

Anirud Aggarwal,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Walmer_2026_CVPR, author = {Walmer, Matthew and Suri, Saksham and Aggarwal, Anirud and Shrivastava, Abhinav}, title = {UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41288-41298} }
Temporal Imbalance of Positive and Negative Supervision in Class-Incremental Learning: Jinge Ma,

Fengqing Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Jinge and Zhu, Fengqing}, title = {Temporal Imbalance of Positive and Negative Supervision in Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32299-32308} }
Global-Aware Edge Prioritization for Pose Graph Initialization: Tong Wei,

Giorgos Tolias,

Jiri Matas,

Daniel Barath; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Tong and Tolias, Giorgos and Matas, Jiri and Barath, Daniel}, title = {Global-Aware Edge Prioritization for Pose Graph Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28642-28651} }
Geometry-driven OOD Detectors Are Class-Incremental Learners: Wangwang Jia,

Zijian Gao,

Tianjiao Wan,

Yuan Cao,

Yong Dou,

Kele Xu; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Wangwang and Gao, Zijian and Wan, Tianjiao and Cao, Yuan and Dou, Yong and Xu, Kele}, title = {Geometry-driven OOD Detectors Are Class-Incremental Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34638-34649} }
Enhancing Unregistered Hyperspectral Image Super-Resolution via Unmixing-based Abundance Fusion Learning: Yingkai Zhang,

Tao Zhang,

Jing Nie,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yingkai and Zhang, Tao and Nie, Jing and Fu, Ying}, title = {Enhancing Unregistered Hyperspectral Image Super-Resolution via Unmixing-based Abundance Fusion Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41573-41583} }
Saliency-Driven Token Merging for Vision Transformers: Weiying Xie,

Xiaoyu Chen,

Xin Zhang,

Chenhe Hao,

Jitao Ma,

Yunsong Li,

Leyuan Fang; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Weiying and Chen, Xiaoyu and Zhang, Xin and Hao, Chenhe and Ma, Jitao and Li, Yunsong and Fang, Leyuan}, title = {Saliency-Driven Token Merging for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32184-32193} }
A Semantically Disentangled Unified Model for Multi-category 3D Anomaly Detection: SuYeon Kim,

Wongyu Lee,

MyeongAh Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, SuYeon and Lee, Wongyu and Cho, MyeongAh}, title = {A Semantically Disentangled Unified Model for Multi-category 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33036-33045} }
AVFakeBench: A Comprehensive Audio-Video Forgery Detection Benchmark for AV-LMMs: Shuhan Xia,

Peipei Li,

Xuannan Liu,

Dongsen Zhang,

Xinyu Guo,

Zekun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Shuhan and Li, Peipei and Liu, Xuannan and Zhang, Dongsen and Guo, Xinyu and Li, Zekun}, title = {AVFakeBench: A Comprehensive Audio-Video Forgery Detection Benchmark for AV-LMMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35416-35426} }
Towards Uncertainty-aware Unsupervised Domain Adaptation for Videos and Time-Series with Causal Optimal Transport: Khushboo Mishra,

Varun Trivedi,

Tanima Dutta; [pdf] [supp]
[bibtex]
@InProceedings{Mishra_2026_CVPR, author = {Mishra, Khushboo and Trivedi, Varun and Dutta, Tanima}, title = {Towards Uncertainty-aware Unsupervised Domain Adaptation for Videos and Time-Series with Causal Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29421-29430} }
What Makes Good Synthetic Training Data for Zero-Shot Stereo Matching?: David Yan,

Alexander Raistrick,

Jia Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, David and Raistrick, Alexander and Deng, Jia}, title = {What Makes Good Synthetic Training Data for Zero-Shot Stereo Matching?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34171-34180} }
GenieDrive: Towards Physics-Aware Driving World Model with 4D Occupancy Guided Video Generation: Zhenya Yang,

Zhe Liu,

Yuxiang Lu,

Liping Hou,

Chenxuan Miao,

Siyi Peng,

Bailan Feng,

Xiang Bai,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenya and Liu, Zhe and Lu, Yuxiang and Hou, Liping and Miao, Chenxuan and Peng, Siyi and Feng, Bailan and Bai, Xiang and Zhao, Hengshuang}, title = {GenieDrive: Towards Physics-Aware Driving World Model with 4D Occupancy Guided Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35680-35690} }
Human-like Abstract Visual Reasoning via Understanding and Solving Reasoning Loop: Xinwang Chen,

Xiuxing Li,

Qing Li,

Ziyue Zhuang,

Yutong Wu,

Ziyu Li,

Zhuo Wang,

Kai Li,

Jianye Hao,

Xia Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xinwang and Li, Xiuxing and Li, Qing and Zhuang, Ziyue and Wu, Yutong and Li, Ziyu and Wang, Zhuo and Li, Kai and Hao, Jianye and Wu, Xia}, title = {Human-like Abstract Visual Reasoning via Understanding and Solving Reasoning Loop}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41235-41244} }
Let it Snow! Animating 3D Gaussian Scenes with Dynamic Weather Effects via Physics-Guided Score Distillation: Gal Fiebelman,

Hadar Averbuch-Elor,

Sagie Benaim; [pdf] [supp]
[bibtex]
@InProceedings{Fiebelman_2026_CVPR, author = {Fiebelman, Gal and Averbuch-Elor, Hadar and Benaim, Sagie}, title = {Let it Snow! Animating 3D Gaussian Scenes with Dynamic Weather Effects via Physics-Guided Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37322-37331} }
Mark4D: Temporally-Consistent Watermarking for 4D Gaussian Splatting: Jaejin Lee,

Minjae Jeong,

Joonhyuk Park,

Yechan Hwang,

Seunghun Baek,

Won Hwa Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jaejin and Jeong, Minjae and Park, Joonhyuk and Hwang, Yechan and Baek, Seunghun and Kim, Won Hwa}, title = {Mark4D: Temporally-Consistent Watermarking for 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39393-39402} }
3D Gaussian Splatting from Unposed Spike Stream: Yijia Guo,

Tong Hu,

Liwen Hu,

Lei Ma,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yijia and Hu, Tong and Hu, Liwen and Ma, Lei and Huang, Tiejun}, title = {3D Gaussian Splatting from Unposed Spike Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41002-41011} }
WaTeRFlow: Watermark Temporal Robustness via Flow Consistency: Utae Jeong,

Sumin In,

Hyunju Ryu,

Jaewan Choi,

Feng Yang,

Jongheon Jeong,

Seungryong Kim,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Utae and In, Sumin and Ryu, Hyunju and Choi, Jaewan and Yang, Feng and Jeong, Jongheon and Kim, Seungryong and Kim, Sangpil}, title = {WaTeRFlow: Watermark Temporal Robustness via Flow Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31703-31713} }
Post-training Feature Pruning for Fundus Images Classification: Van-Nguyen Pham,

Duc-Tai Le,

Junghyun Bum,

Hyunseung Choo; [pdf] [supp]
[bibtex]
@InProceedings{Pham_2026_CVPR, author = {Pham, Van-Nguyen and Le, Duc-Tai and Bum, Junghyun and Choo, Hyunseung}, title = {Post-training Feature Pruning for Fundus Images Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37590-37599} }
LoL: Longer than Longer, Scaling Video Generation to Hour: Justin Cui,

Jie Wu,

Ming Li,

Tao Yang,

Xiaojie Li,

Rui Wang,

Andrew Bai,

Yuanhao Ban,

Cho-Jui Hsieh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Justin and Wu, Jie and Li, Ming and Yang, Tao and Li, Xiaojie and Wang, Rui and Bai, Andrew and Ban, Yuanhao and Hsieh, Cho-Jui}, title = {LoL: Longer than Longer, Scaling Video Generation to Hour}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38132-38142} }
Markovian Scale Prediction: A New Era of Visual Autoregressive Generation: Yu Zhang,

Jingyi Liu,

Yiwei Shi,

Qi Zhang,

Duoqian Miao,

Changwei Wang,

Longbing Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yu and Liu, Jingyi and Shi, Yiwei and Zhang, Qi and Miao, Duoqian and Wang, Changwei and Cao, Longbing}, title = {Markovian Scale Prediction: A New Era of Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41266-41277} }
See Through the Noise: Improving Domain Generalization in Gaze Estimation: Yanming Peng,

Shijing Wang,

Yaping Huang,

Yi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Yanming and Wang, Shijing and Huang, Yaping and Tian, Yi}, title = {See Through the Noise: Improving Domain Generalization in Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31346-31355} }
RAAS: LLM Agentic System Architecture Search with GRPO: Jiayi Yang,

Guancheng Wan,

Man Zhang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiayi and Wan, Guancheng and Zhang, Man and Ye, Mang}, title = {RAAS: LLM Agentic System Architecture Search with GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34470-34479} }
VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation: Longteng Jiang,

DanDan Zheng,

Qianqian Qiao,

Heng Huang,

Huaye Wang,

Yihang Bo,

Bao Peng,

Jingdong Chen,

Jun Zhou,

Xin Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Longteng and Zheng, DanDan and Qiao, Qianqian and Huang, Heng and Wang, Huaye and Bo, Yihang and Peng, Bao and Chen, Jingdong and Zhou, Jun and Jin, Xin}, title = {VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30457-30466} }
GROW: Watermark Generation with Progressive Guidance for Diffusion Models: Pengcheng Luo,

Zexi Jia,

Yijia Zhong,

Jinchao Zhang,

Jie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Pengcheng and Jia, Zexi and Zhong, Yijia and Zhang, Jinchao and Zhou, Jie}, title = {GROW: Watermark Generation with Progressive Guidance for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35978-35987} }
Cubic Discrete Diffusion: Discrete Visual Generation on High-Dimensional Representation Tokens: Yuqing Wang,

Chuofan Ma,

Zhijie Lin,

Yao Teng,

Lijun Yu,

Shuai Wang,

Jiaming Han,

Jiashi Feng,

Yi Jiang,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuqing and Ma, Chuofan and Lin, Zhijie and Teng, Yao and Yu, Lijun and Wang, Shuai and Han, Jiaming and Feng, Jiashi and Jiang, Yi and Liu, Xihui}, title = {Cubic Discrete Diffusion: Discrete Visual Generation on High-Dimensional Representation Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36072-36081} }
Ego-InBetween: Generating Object State Transitions in Ego-Centric Videos: Mengmeng Ge,

Takashi Isobe,

Xu Jia,

Yanan Sun,

Zetong Yang,

Weinong Wang,

Dong Zhou,

Dong Li,

Huchuan Lu,

Emad Barsoum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Mengmeng and Isobe, Takashi and Jia, Xu and Sun, Yanan and Yang, Zetong and Wang, Weinong and Zhou, Dong and Li, Dong and Lu, Huchuan and Barsoum, Emad}, title = {Ego-InBetween: Generating Object State Transitions in Ego-Centric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43145-43154} }
RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning: Ehsan Ahmadi,

Hunter Schofield,

Behzad Khamidehi,

Fazel Arasteh,

Jinjun Shan,

Lili Mou,

Dongfeng Bai,

Kasra Rezaee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahmadi_2026_CVPR, author = {Ahmadi, Ehsan and Schofield, Hunter and Khamidehi, Behzad and Arasteh, Fazel and Shan, Jinjun and Mou, Lili and Bai, Dongfeng and Rezaee, Kasra}, title = {RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39734-39743} }
E-comIQ-ZH: A Human-Aligned Dataset and Benchmark for Fine-Grained Evaluation of E-commerce Posters with Chain-of-Thought: Meiqi Sun,

Mingyu Li,

Junxiong Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Meiqi and Li, Mingyu and Zhu, Junxiong}, title = {E-comIQ-ZH: A Human-Aligned Dataset and Benchmark for Fine-Grained Evaluation of E-commerce Posters with Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30941-30951} }
Progressive Mask Distillation for Self-supervised Video Representation: Kewei Wu,

Chong Liang,

Zhao Xie,

Dan Guo; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Kewei and Liang, Chong and Xie, Zhao and Guo, Dan}, title = {Progressive Mask Distillation for Self-supervised Video Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41677-41687} }
Computational Speckle Pattern Interferometry: Shengxi Wu,

Sophia Yang,

Dorian Chan,

Matthew O'Toole; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Shengxi and Yang, Sophia and Chan, Dorian and O'Toole, Matthew}, title = {Computational Speckle Pattern Interferometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41710-41719} }
OS-Fed: One Snapshot Is All You Need: Xuwei Qian,

Jinghui Zhang,

Yuchuan Tan,

Wenbo Huang,

Zhen Wu,

Shen Zhou,

LiSha Gao,

Ding Ding,

Fang Dong; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Xuwei and Zhang, Jinghui and Tan, Yuchuan and Huang, Wenbo and Wu, Zhen and Zhou, Shen and Gao, LiSha and Ding, Ding and Dong, Fang}, title = {OS-Fed: One Snapshot Is All You Need}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31758-31768} }
Your Classifier Can Do More: Towards Balancing the Gaps in Classification, Robustness, and Generation: Kaichao Jiang,

He Wang,

Xiaoshuai Hao,

Xiulong Yang,

Ajian Liu,

Qi Chu,

Yunfeng Diao,

Richang Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Kaichao and Wang, He and Hao, Xiaoshuai and Yang, Xiulong and Liu, Ajian and Chu, Qi and Diao, Yunfeng and Hong, Richang}, title = {Your Classifier Can Do More: Towards Balancing the Gaps in Classification, Robustness, and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42310-42320} }
FlowSteer: Guiding Few-Step Image Synthesis with Authentic Trajectories: Lei Ke,

Hubery Yin,

Gongye Liu,

Zhengyao Lv,

Jingcai Guo,

Chen Li,

Wenhan Luo,

Yujiu Yang,

Jing Lyu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Lei and Yin, Hubery and Liu, Gongye and Lv, Zhengyao and Guo, Jingcai and Li, Chen and Luo, Wenhan and Yang, Yujiu and Lyu, Jing}, title = {FlowSteer: Guiding Few-Step Image Synthesis with Authentic Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30381-30390} }
Cross-Architecture Adaptation: Cloud-Edge Continual Test-Time Adaptation with Dynamic Sampling and Heterogeneous Distillation: Zirui Xu,

Xianhang Chu,

Jiahao Li,

Xu Yang,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zirui and Chu, Xianhang and Li, Jiahao and Yang, Xu and Deng, Cheng}, title = {Cross-Architecture Adaptation: Cloud-Edge Continual Test-Time Adaptation with Dynamic Sampling and Heterogeneous Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39901-39910} }
Focus, Don't Prune: Identifying Instruction-Relevant Regions for Information-Rich Image Understanding: Mincheol Kwon,

Minseung Lee,

Seonga Choi,

Miso Choi,

Kyeongjin Oh,

Hyunyoung Lee,

Cheonyoung Park,

Yongho Song,

Seunghyun Park,

Jinkyu Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Mincheol and Lee, Minseung and Choi, Seonga and Choi, Miso and Oh, Kyeongjin and Lee, Hyunyoung and Park, Cheonyoung and Song, Yongho and Park, Seunghyun and Kim, Jinkyu}, title = {Focus, Don't Prune: Identifying Instruction-Relevant Regions for Information-Rich Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31900-31909} }
UniLDiff: Unlocking the Power of Diffusion Priors for All-in-One Image Restoration: Zihan Cheng,

Liangtai Zhou,

Dian Chen,

Ni Tang,

Xiaotong Luo,

Yuan Xie,

Yanyun Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zihan and Zhou, Liangtai and Chen, Dian and Tang, Ni and Luo, Xiaotong and Xie, Yuan and Qu, Yanyun}, title = {UniLDiff: Unlocking the Power of Diffusion Priors for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37465-37475} }
Progressive Cross-Modal Causal Intervention for Long-Term Action Recognition: Shaowu Xu,

Xibin Jia,

Chao Fan,

Junyu Gao,

Jing Chang,

Qianmei Sun; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Shaowu and Jia, Xibin and Fan, Chao and Gao, Junyu and Chang, Jing and Sun, Qianmei}, title = {Progressive Cross-Modal Causal Intervention for Long-Term Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31186-31195} }
Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation: Ruiying Liu,

Yuanzhi Liang,

Haibin Huang,

Tianshu Yu,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ruiying and Liang, Yuanzhi and Huang, Haibin and Yu, Tianshu and Zhang, Chi}, title = {Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34408-34417} }
STCDiT: Spatio-Temporally Consistent Diffusion Transformer for High-Quality Video Super-Resolution: Junyang Chen,

Jiangxin Dong,

Long Sun,

Yixin Yang,

Jinshan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Dong, Jiangxin and Sun, Long and Yang, Yixin and Pan, Jinshan}, title = {STCDiT: Spatio-Temporally Consistent Diffusion Transformer for High-Quality Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38281-38290} }
OpenFS: Multi-Hand-Capable Fingerspelling Recognition with Implicit Signing-Hand Detection and Frame-Wise Letter-Conditioned Synthesis: Junuk Cha,

Jihyeon Kim,

Han-Mu Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2026_CVPR, author = {Cha, Junuk and Kim, Jihyeon and Park, Han-Mu}, title = {OpenFS: Multi-Hand-Capable Fingerspelling Recognition with Implicit Signing-Hand Detection and Frame-Wise Letter-Conditioned Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30707-30717} }
TLMA: Mitigating the Impact of Weakly Labeled Information for Video Anomaly Detection: Rong Xu,

Runqi Wang,

Yingjun Zhang,

Tao Tao,

Xiaomeng Li,

Liping Jing; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Rong and Wang, Runqi and Zhang, Yingjun and Tao, Tao and Li, Xiaomeng and Jing, Liping}, title = {TLMA: Mitigating the Impact of Weakly Labeled Information for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35597-35606} }
SounDiT: Geo-Contextual Soundscape-to-Landscape Generation: Junbo Wang,

Haofeng Tan,

Bowen Liao,

Albert Jiang,

Teng Fei,

Qixing Huang,

Bing Zhou,

Zhengzhong Tu,

Shan Ye,

Yuhao Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Junbo and Tan, Haofeng and Liao, Bowen and Jiang, Albert and Fei, Teng and Huang, Qixing and Zhou, Bing and Tu, Zhengzhong and Ye, Shan and Kang, Yuhao}, title = {SounDiT: Geo-Contextual Soundscape-to-Landscape Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32659-32670} }
EXOTIC: External Vision-driven Incomplete Multi-view Classification: Shilin Xu,

Dezhong Peng,

Zhenwen Ren,

Yuan Sun; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Shilin and Peng, Dezhong and Ren, Zhenwen and Sun, Yuan}, title = {EXOTIC: External Vision-driven Incomplete Multi-view Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30216-30225} }
FINER: MLLMs Hallucinate under Fine-grained Negative Queries: Rui Xiao,

Sanghwan Kim,

Yongqin Xian,

Zeynep Akata,

Stephan Alaniz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Rui and Kim, Sanghwan and Xian, Yongqin and Akata, Zeynep and Alaniz, Stephan}, title = {FINER: MLLMs Hallucinate under Fine-grained Negative Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36235-36244} }
MoRel: Long-Range Flicker-Free 4D Motion Modeling via Anchor Relay-based Bidirectioanl Blending with Hierarchical Densification: Sangwoon Kwak,

Weeyoung Kwon,

Jun Young Jeong,

Geonho Kim,

Won-Sik Cheong,

Jihyong Oh; [pdf] [supp]
[bibtex]
@InProceedings{Kwak_2026_CVPR, author = {Kwak, Sangwoon and Kwon, Weeyoung and Jeong, Jun Young and Kim, Geonho and Cheong, Won-Sik and Oh, Jihyong}, title = {MoRel: Long-Range Flicker-Free 4D Motion Modeling via Anchor Relay-based Bidirectioanl Blending with Hierarchical Densification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37370-37379} }
CoV-Align: Efficient Fine-grained Cross-Modal Alignment with Cohesive Visual Semantics Priority: Hengqi Liu,

Wanting Zhou,

Longteng Kong,

Fangxiang Feng,

Lei Ren,

Wei Chen,

Xiaojie Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hengqi and Zhou, Wanting and Kong, Longteng and Feng, Fangxiang and Ren, Lei and Chen, Wei and Wang, Xiaojie}, title = {CoV-Align: Efficient Fine-grained Cross-Modal Alignment with Cohesive Visual Semantics Priority}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36828-36837} }
Question-guided Visual Compression with Memory Feedback for Long-Term Video Understanding: Sosuke Yamao,

Natsuki Miyahara,

Yuankai Qi,

Shun Takeuchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamao_2026_CVPR, author = {Yamao, Sosuke and Miyahara, Natsuki and Qi, Yuankai and Takeuchi, Shun}, title = {Question-guided Visual Compression with Memory Feedback for Long-Term Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32850-32859} }
A Difference-in-Difference Approach to Detecting AI-Generated Images: Xinyi Qi,

Kai Ye,

Chengchun Shi,

Ying Yang,

Jin Zhu,

Hongyi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Xinyi and Ye, Kai and Shi, Chengchun and Yang, Ying and Zhu, Jin and Zhou, Hongyi}, title = {A Difference-in-Difference Approach to Detecting AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42964-42975} }
CompetitorFormer: Mitigating Query Conflicts for 3D Instance Segmentation via Competitive Strategy: Duanchu Wang,

Junjie Yang,

Haoran Gong,

Jing Liu,

Di Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Duanchu and Yang, Junjie and Gong, Haoran and Liu, Jing and Wang, Di}, title = {CompetitorFormer: Mitigating Query Conflicts for 3D Instance Segmentation via Competitive Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34724-34733} }
Ego-Grounding for Personalized Question-Answering in Egocentric Videos: Junbin Xiao,

Shenglang Zhang,

Pengxiang Zhu,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junbin and Zhang, Shenglang and Zhu, Pengxiang and Yao, Angela}, title = {Ego-Grounding for Personalized Question-Answering in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40537-40547} }
MERIT: Multi-domain Efficient RAW Image Translation: Wenjun Huang,

Shenghao Fu,

Yian Jin,

Yang Ni,

Ziteng Cui,

Hanning Chen,

Yirui He,

Yezi Liu,

Sanggeon Yun,

SungHeon Jeong,

Ryozo Masukawa,

William Youngwoo Chung,

Mohsen Imani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Wenjun and Fu, Shenghao and Jin, Yian and Ni, Yang and Cui, Ziteng and Chen, Hanning and He, Yirui and Liu, Yezi and Yun, Sanggeon and Jeong, SungHeon and Masukawa, Ryozo and Chung, William Youngwoo and Imani, Mohsen}, title = {MERIT: Multi-domain Efficient RAW Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37216-37225} }
FeatureFool: Zero-Query Fooling of Video Models via Feature Map: Duoxun Tang,

Xi Xiao,

Guangwu Hu,

Kangkang Sun,

Xiao Yang,

Dongyang Chen,

Qing Li,

Yong-jie Yin,

Jiyao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Duoxun and Xiao, Xi and Hu, Guangwu and Sun, Kangkang and Yang, Xiao and Chen, Dongyang and Li, Qing and Yin, Yong-jie and Wang, Jiyao}, title = {FeatureFool: Zero-Query Fooling of Video Models via Feature Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42268-42279} }
A Temporal and Content Co-Awareness Latent Diffusion for Controllable Hand Image Generation: Shuang Hao,

Pengfei Ren,

Haifeng Sun,

Ting Pan,

Qi Qi,

Lei Zhang,

Cong Liu,

Jianxin Liao,

Jingyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Shuang and Ren, Pengfei and Sun, Haifeng and Pan, Ting and Qi, Qi and Zhang, Lei and Liu, Cong and Liao, Jianxin and Wang, Jingyu}, title = {A Temporal and Content Co-Awareness Latent Diffusion for Controllable Hand Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38324-38334} }
StyleGallery: Training-free and Semantic-aware Personalized Style Transfer from Arbitrary Image References: Boyu He,

Yunfan Ye,

Chang Liu,

Weishang Wu,

Fang Liu,

Zhiping Cai; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Boyu and Ye, Yunfan and Liu, Chang and Wu, Weishang and Liu, Fang and Cai, Zhiping}, title = {StyleGallery: Training-free and Semantic-aware Personalized Style Transfer from Arbitrary Image References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29092-29102} }
HOG-Layout: Hierarchical 3D Scene Generation, Optimization and Editing via Vision-Language Models: Haiyan Jiang,

Deyu Zhang,

Dongdong Weng,

Weitao Song,

Henry Been-Lirn Duh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haiyan and Zhang, Deyu and Weng, Dongdong and Song, Weitao and Duh, Henry Been-Lirn}, title = {HOG-Layout: Hierarchical 3D Scene Generation, Optimization and Editing via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31586-31596} }
SeD-UD: An Influence-Driven and Hierarchically-Decoupled Information Bottleneck for Multimodal Intent Recognition: Qin Li,

Wenbo Zhang,

Limei Liu,

Han Peng,

Junfeng Yang,

Guanying Xu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qin and Zhang, Wenbo and Liu, Limei and Peng, Han and Yang, Junfeng and Xu, Guanying}, title = {SeD-UD: An Influence-Driven and Hierarchically-Decoupled Information Bottleneck for Multimodal Intent Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30346-30356} }
FAAR: Efficient Frequency-Aware Multi-Task Fine-Tuning via Automatic Rank Selection: Maxime Fontana,

Michael Spratling,

Miaojing Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fontana_2026_CVPR, author = {Fontana, Maxime and Spratling, Michael and Shi, Miaojing}, title = {FAAR: Efficient Frequency-Aware Multi-Task Fine-Tuning via Automatic Rank Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31135-31144} }
GeoWorld: Geometric World Models: Zeyu Zhang,

Danning Li,

Ian Reid,

Richard Hartley; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zeyu and Li, Danning and Reid, Ian and Hartley, Richard}, title = {GeoWorld: Geometric World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30952-30963} }
IFCSR: Inference-Free Fidelity-Realism Control for One-Step Diffusion-based Real-World Image Super-Resolution: Jonghee Back,

Jongju Kim,

Jeong-Uk Kim,

Eunjin Kim,

Minyong Jeon; [pdf] [supp]
[bibtex]
@InProceedings{Back_2026_CVPR, author = {Back, Jonghee and Kim, Jongju and Kim, Jeong-Uk and Kim, Eunjin and Jeon, Minyong}, title = {IFCSR: Inference-Free Fidelity-Realism Control for One-Step Diffusion-based Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38187-38197} }
4D Local Modeling Toward Dynamic Global Perception for Ambiguity-free Rotation-Invariant Point Cloud Analysis: Jiaxun Guo,

Wentao Fan,

Manar Amayri,

Nizar Bouguila; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Jiaxun and Fan, Wentao and Amayri, Manar and Bouguila, Nizar}, title = {4D Local Modeling Toward Dynamic Global Perception for Ambiguity-free Rotation-Invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31440-31449} }
Mocap-2-to-3: Multi-view Lifting for Monocular Motion Recovery with 2D Pretraining: Zhumei Wang,

Zechen Hu,

Ruoxi Guo,

Huaijin Pi,

Ziyong Feng,

Liang Zhang,

Mingtao Pei,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhumei and Hu, Zechen and Guo, Ruoxi and Pi, Huaijin and Feng, Ziyong and Zhang, Liang and Pei, Mingtao and Huang, Siyuan}, title = {Mocap-2-to-3: Multi-view Lifting for Monocular Motion Recovery with 2D Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42869-42878} }
LayoutAD: Exploring Semantic-Geometric Misalignment Reasoning for Scene Layout Anomaly Detection: Zhichao Zeng,

Jiasheng Zhang,

Jiyun Sun,

Jiangtao Cui,

Xiaotian Qiao; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Zhichao and Zhang, Jiasheng and Sun, Jiyun and Cui, Jiangtao and Qiao, Xiaotian}, title = {LayoutAD: Exploring Semantic-Geometric Misalignment Reasoning for Scene Layout Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35567-35576} }
Few-Step Diffusion Sampling Through Instance-Aware Discretizations: Liangyu Yuan,

Ruoyu Wang,

Tong Zhao,

Dingwen Fu,

Mingkun Lei,

Beier Zhu,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Liangyu and Wang, Ruoyu and Zhao, Tong and Fu, Dingwen and Lei, Mingkun and Zhu, Beier and Zhang, Chi}, title = {Few-Step Diffusion Sampling Through Instance-Aware Discretizations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35882-35892} }
Revisiting 3D Reconstruction Kernels as Low-Pass Filters: Shengjun Zhang,

Min Chen,

Yibo Wei,

Mingyu Dong,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shengjun and Chen, Min and Wei, Yibo and Dong, Mingyu and Duan, Yueqi}, title = {Revisiting 3D Reconstruction Kernels as Low-Pass Filters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33374-33383} }
Mining Instance-Centric Vision-Language Contexts for Human-Object Interaction Detection: Soo Won Seo,

KyungChae Lee,

Hyungchan Cho,

Taein Son,

Nam Ik Cho,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Soo Won and Lee, KyungChae and Cho, Hyungchan and Son, Taein and Cho, Nam Ik and Choi, Jun Won}, title = {Mining Instance-Centric Vision-Language Contexts for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40418-40427} }
SpatialVID: A Large-Scale Video Dataset with Spatial Annotations: Jiahao Wang,

Yufeng Yuan,

Rujie Zheng,

Youtian Lin,

Jian Gao,

Lin-Zhuo Chen,

Yajie Bao,

Chang Zeng,

Yanxi Zhou,

Xiao-Xiao Long,

Hao Zhu,

Zhaoxiang Zhang,

Xun Cao,

Yao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Yuan, Yufeng and Zheng, Rujie and Lin, Youtian and Gao, Jian and Chen, Lin-Zhuo and Bao, Yajie and Zeng, Chang and Zhou, Yanxi and Long, Xiao-Xiao and Zhu, Hao and Zhang, Zhaoxiang and Cao, Xun and Yao, Yao}, title = {SpatialVID: A Large-Scale Video Dataset with Spatial Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42592-42603} }
ReasonMap: Towards Fine-Grained Visual Reasoning from Transit Maps: Sicheng Feng,

Song Wang,

Shuyi Ouyang,

Lingdong Kong,

Zikai Song,

Jianke Zhu,

Huan Wang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Sicheng and Wang, Song and Ouyang, Shuyi and Kong, Lingdong and Song, Zikai and Zhu, Jianke and Wang, Huan and Wang, Xinchao}, title = {ReasonMap: Towards Fine-Grained Visual Reasoning from Transit Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41077-41088} }
3D sans 3D Scans: Scalable Pre-training from Video-Generated Point Clouds: Ryousuke Yamada,

Kohsuke Ide,

Yoshihiro Fukuhara,

Hirokatsu Kataoka,

Gilles Puy,

Andrei Bursuc,

Yuki M. Asano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamada_2026_CVPR, author = {Yamada, Ryousuke and Ide, Kohsuke and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Puy, Gilles and Bursuc, Andrei and Asano, Yuki M.}, title = {3D sans 3D Scans: Scalable Pre-training from Video-Generated Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39075-39085} }
DK-DDIL: Adaptive Knowledge Retention for Dynamic Domain-Incremental Learning in Medical Imaging: Yuxi Ma,

Sujie Liu,

Jing Yang,

Jiacheng Wang,

Yiping Chen,

Baptiste Magnier,

Liansheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yuxi and Liu, Sujie and Yang, Jing and Wang, Jiacheng and Chen, Yiping and Magnier, Baptiste and Wang, Liansheng}, title = {DK-DDIL: Adaptive Knowledge Retention for Dynamic Domain-Incremental Learning in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36290-36299} }
MUST: Modality-Specific Representation-Aware Transformer for Diffusion-Enhanced Survival Prediction with Missing Modality: Kyungwon Kim,

Dosik Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Kyungwon and Hwang, Dosik}, title = {MUST: Modality-Specific Representation-Aware Transformer for Diffusion-Enhanced Survival Prediction with Missing Modality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30312-30321} }
Diff-SemiER: Transparency-Aware Adaptive Fusion Diffusion Model with Generative Prior for Semi-Transparent Eyeglasses Removal: Jiahao Li,

Shiqi Yin,

Zhenxiang Lian,

Jingtao Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Yin, Shiqi and Lian, Zhenxiang and Guo, Jingtao}, title = {Diff-SemiER: Transparency-Aware Adaptive Fusion Diffusion Model with Generative Prior for Semi-Transparent Eyeglasses Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30813-30822} }
LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction: Tianye Ding,

Yiming Xie,

Yiqing Liang,

Moitreya Chatterjee,

Pedro Miraldo,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Tianye and Xie, Yiming and Liang, Yiqing and Chatterjee, Moitreya and Miraldo, Pedro and Jiang, Huaizu}, title = {LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36433-36443} }
Hierarchical Codec Diffusion for Video-to-Speech Generation: Jiaxin Ye,

Gaoxiang Cong,

Chenhui Wang,

Xin-Cheng Wen,

Zhaoyang Li,

Boyuan Cao,

Hongming Shan; [pdf] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Jiaxin and Cong, Gaoxiang and Wang, Chenhui and Wen, Xin-Cheng and Li, Zhaoyang and Cao, Boyuan and Shan, Hongming}, title = {Hierarchical Codec Diffusion for Video-to-Speech Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43352-43362} }
SO-Bench: A Structural Output Evaluation of Multimodal LLM: Di Feng,

Kaixin Ma,

Feng Nan,

Haofeng Chen,

Bohan Zhai,

David Griffiths,

Mingfei Gao,

Zhe Gan,

Eshan Verma,

Yinfei Yang,

Zhifeng Chen,

Afshin Dehghan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Di and Ma, Kaixin and Nan, Feng and Chen, Haofeng and Zhai, Bohan and Griffiths, David and Gao, Mingfei and Gan, Zhe and Verma, Eshan and Yang, Yinfei and Chen, Zhifeng and Dehghan, Afshin}, title = {SO-Bench: A Structural Output Evaluation of Multimodal LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37694-37704} }
Real-Time Neural Video Compression with Unified Intra and Inter Coding: Hui Xiang,

Yifan Bian,

Li Li,

Jingran Wu,

Xianguo Zhang,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Hui and Bian, Yifan and Li, Li and Wu, Jingran and Zhang, Xianguo and Liu, Dong}, title = {Real-Time Neural Video Compression with Unified Intra and Inter Coding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35217-35226} }
RebRL: Reinforcing Discrete Visual Diffusion Models with Rebalanced Timestep Credits: Mu Zhang,

Tianren Ma,

Yunfan Liu,

Kun Hu,

Qixiang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mu and Ma, Tianren and Liu, Yunfan and Hu, Kun and Ye, Qixiang}, title = {RebRL: Reinforcing Discrete Visual Diffusion Models with Rebalanced Timestep Credits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43135-43144} }
Task-Aware Image Signal Processor for Advanced Visual Perception: Kai Chen,

Jin Xiao,

Leheng Zhang,

Kexuan Shi,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Kai and Xiao, Jin and Zhang, Leheng and Shi, Kexuan and Gu, Shuhang}, title = {Task-Aware Image Signal Processor for Advanced Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33672-33681} }
Think Visually, Reason Textually: Vision-Language Synergy in Abstract Reasoning: Beichen Zhang,

Yuhang Zang,

Xiaoyi Dong,

Yuhang Cao,

Haodong Duan,

Dahua Lin,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beichen and Zang, Yuhang and Dong, Xiaoyi and Cao, Yuhang and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Think Visually, Reason Textually: Vision-Language Synergy in Abstract Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41203-41212} }
UniLight: A Unified Representation for Lighting: Zitian Zhang,

Iliyan Georgiev,

Michael Fischer,

Yannick Hold-Geoffroy,

Jean-Francois Lalonde,

Valentin Deschaintre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zitian and Georgiev, Iliyan and Fischer, Michael and Hold-Geoffroy, Yannick and Lalonde, Jean-Francois and Deschaintre, Valentin}, title = {UniLight: A Unified Representation for Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29684-29694} }
MaskDexGrasp: Generative Masked Modeling for Part-Aware Dexterous Grasp Synthesis: Binghui Zuo,

Lin Zhou,

Haoxuan Xu,

Jianan Yan,

Zhipeng Yu,

Zekai Liu,

Yangang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Binghui and Zhou, Lin and Xu, Haoxuan and Yan, Jianan and Yu, Zhipeng and Liu, Zekai and Wang, Yangang}, title = {MaskDexGrasp: Generative Masked Modeling for Part-Aware Dexterous Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29598-29609} }
Infinity-RoPE: Action-Controllable Infinite Video Generation Emerges From Autoregressive Self-Rollout: Hidir Yesiltepe,

Tuna Meral,

Adil Kaan Akan,

Kaan Oktay,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yesiltepe_2026_CVPR, author = {Yesiltepe, Hidir and Meral, Tuna and Akan, Adil Kaan and Oktay, Kaan and Yanardag, Pinar}, title = {Infinity-RoPE: Action-Controllable Infinite Video Generation Emerges From Autoregressive Self-Rollout}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40256-40265} }
Learning Latent Transmission and Glare Maps for Lens Veiling Glare Removal: Xiaolong Qian,

Qi Jiang,

Lei Sun,

Zongxi Yu,

Kailun Yang,

Peixuan Wu,

Jiacheng Zhou,

Yao Gao,

Yaoguang Ma,

Ming-Hsuan Yang,

Kaiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Xiaolong and Jiang, Qi and Sun, Lei and Yu, Zongxi and Yang, Kailun and Wu, Peixuan and Zhou, Jiacheng and Gao, Yao and Ma, Yaoguang and Yang, Ming-Hsuan and Wang, Kaiwei}, title = {Learning Latent Transmission and Glare Maps for Lens Veiling Glare Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33995-34005} }
UnicEdit-10M: A Dataset and Benchmark Breaking the Scale-Quality Barrier via Unified Verification for Reasoning-Enriched Edits: Keming Ye,

Zhipeng Huang,

Canmiao Fu,

Qingyang Liu,

Jiani Cai,

Zheqi Lv,

Chen Li,

Jing LYU,

Zhou Zhao,

Shengyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Keming and Huang, Zhipeng and Fu, Canmiao and Liu, Qingyang and Cai, Jiani and Lv, Zheqi and Li, Chen and LYU, Jing and Zhao, Zhou and Zhang, Shengyu}, title = {UnicEdit-10M: A Dataset and Benchmark Breaking the Scale-Quality Barrier via Unified Verification for Reasoning-Enriched Edits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37279-37289} }
U^2Flow: Uncertainty-Aware Unsupervised Optical Flow Estimation: Xunpei Sun,

Wenwei Lin,

Yi Chang,

Gang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xunpei and Lin, Wenwei and Chang, Yi and Chen, Gang}, title = {U{\textasciicircum}2Flow: Uncertainty-Aware Unsupervised Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28690-28700} }
Back to Basics: Let Denoising Generative Models Denoise: Tianhong Li,

Kaiming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tianhong and He, Kaiming}, title = {Back to Basics: Let Denoising Generative Models Denoise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36115-36125} }
PvP: Data-Efficient Humanoid Robot Learning with Proprioceptive-Privileged Contrastive Representations: Mingqi Yuan,

Tao Yu,

Haolin Song,

Bo Li,

Xin Jin,

Hua Chen,

Wenjun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Mingqi and Yu, Tao and Song, Haolin and Li, Bo and Jin, Xin and Chen, Hua and Zeng, Wenjun}, title = {PvP: Data-Efficient Humanoid Robot Learning with Proprioceptive-Privileged Contrastive Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42375-42385} }
When Do Models Actually Decide? Mapping the Layer-Wise Decision Timeline in Pretrained Neural Networks: Minhyeok Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {When Do Models Actually Decide? Mapping the Layer-Wise Decision Timeline in Pretrained Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34546-34554} }
CoWTracker: Tracking by Warping instead of Correlation: Zihang Lai,

Eldar Insafutdinov,

Edgar Sucar,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Zihang and Insafutdinov, Eldar and Sucar, Edgar and Vedaldi, Andrea}, title = {CoWTracker: Tracking by Warping instead of Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42571-42580} }
Understanding Temporal Logic Consistency in Video-Language Models through Cross-Modal Attention Discriminability: Chengzhi Li,

Heyan Huang,

Ping Jian,

Zhen Yang,

Yaning Tian,

Zhongbin Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chengzhi and Huang, Heyan and Jian, Ping and Yang, Zhen and Tian, Yaning and Guo, Zhongbin}, title = {Understanding Temporal Logic Consistency in Video-Language Models through Cross-Modal Attention Discriminability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31812-31821} }
Rascene: High-Fidelity 3D Scene Imaging with mmWave Communication Signals: Kunzhe Song,

Geo Jie Zhou,

Xiaoming Liu,

Huacheng Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Kunzhe and Zhou, Geo Jie and Liu, Xiaoming and Zeng, Huacheng}, title = {Rascene: High-Fidelity 3D Scene Imaging with mmWave Communication Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36454-36463} }
Depth Hypothesis Guided Iterative Refinement for Event-Image Monocular Depth Estimation: Daikun Liu,

Teng Wang,

Changyin Sun; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Daikun and Wang, Teng and Sun, Changyin}, title = {Depth Hypothesis Guided Iterative Refinement for Event-Image Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29504-29513} }
SDUIE: Semi-Supervised Diffusion for Underwater Image Enhancement with Quant-Text Dual Control: Xiaofeng Cong,

Yu-Xin Zhang,

Hao Shen,

Yeying Jin,

Junming Hou,

Jie Gui; [pdf] [supp]
[bibtex]
@InProceedings{Cong_2026_CVPR, author = {Cong, Xiaofeng and Zhang, Yu-Xin and Shen, Hao and Jin, Yeying and Hou, Junming and Gui, Jie}, title = {SDUIE: Semi-Supervised Diffusion for Underwater Image Enhancement with Quant-Text Dual Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37423-37433} }
From Static to Dynamic: Exploring Self-supervised Image-to-Video Representation Transfer Learning: Yang Liu,

Qianqian Xu,

Peisong Wen,

Siran Dai,

Xilin Zhao,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Xu, Qianqian and Wen, Peisong and Dai, Siran and Zhao, Xilin and Huang, Qingming}, title = {From Static to Dynamic: Exploring Self-supervised Image-to-Video Representation Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31250-31261} }
Transition Models: Rethinking the Generative Learning Objective: Zidong Wang,

Yiyuan Zhang,

Xiaoyu Yue,

Xiangyu Yue,

Yangguang Li,

Wanli Ouyang,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zidong and Zhang, Yiyuan and Yue, Xiaoyu and Yue, Xiangyu and Li, Yangguang and Ouyang, Wanli and Bai, Lei}, title = {Transition Models: Rethinking the Generative Learning Objective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29178-29189} }
M3DLayout: A Multi-Source Dataset of 3D Indoor Layouts and Structured Descriptions for 3D Generation: Yiheng Zhang,

Zhuojiang Cai,

Mingdao Wang,

Meitong Guo,

Tianxiao Li,

Li Lin,

Yuwang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiheng and Cai, Zhuojiang and Wang, Mingdao and Guo, Meitong and Li, Tianxiao and Lin, Li and Wang, Yuwang}, title = {M3DLayout: A Multi-Source Dataset of 3D Indoor Layouts and Structured Descriptions for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34217-34226} }
MM-OVSeg: Multimodal Optical-SAR Fusion for Open-Vocabulary Segmentation in Remote Sensing: Yimin Wei,

Aoran Xiao,

Hongruixuan Chen,

Junshi Xia,

Naoto Yokoya; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yimin and Xiao, Aoran and Chen, Hongruixuan and Xia, Junshi and Yokoya, Naoto}, title = {MM-OVSeg: Multimodal Optical-SAR Fusion for Open-Vocabulary Segmentation in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42202-42212} }
Depth Any Endoscopy: Towards Self-Supervised Generalizable Depth Estimation in Monocular Endoscopy: Shuwei Shao,

Kejin Zhu,

Shixing Ma,

Xinzhe Du,

Baochang Zhang,

Zhe Min; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Shuwei and Zhu, Kejin and Ma, Shixing and Du, Xinzhe and Zhang, Baochang and Min, Zhe}, title = {Depth Any Endoscopy: Towards Self-Supervised Generalizable Depth Estimation in Monocular Endoscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34126-34137} }
GGPT: Geometry-Grounded Point Transformer: Yutong Chen,

Yiming Wang,

Xucong Zhang,

Sergey Prokudin,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yutong and Wang, Yiming and Zhang, Xucong and Prokudin, Sergey and Tang, Siyu}, title = {GGPT: Geometry-Grounded Point Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28959-28968} }
Bulk RNA-seq Guided Multi-modal Detection of Anomalous Regions in Human Cancer via Spatial Transcriptomics: Hang Shi,

Ruocheng Yang,

Wenjie You,

Zhilin Huang,

Daoqiang Zhang,

Wei Shao; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Hang and Yang, Ruocheng and You, Wenjie and Huang, Zhilin and Zhang, Daoqiang and Shao, Wei}, title = {Bulk RNA-seq Guided Multi-modal Detection of Anomalous Regions in Human Cancer via Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41815-41825} }
GEM-TFL: Bridging Weak and Full Supervision for Forgery Localization through EM-Guided Decomposition and Temporal Refinement: Xiaodong Zhu,

Yuanming Zheng,

Suting Wang,

Junqi Yang,

Yuhong Yang,

Weiping Tu,

Zhongyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaodong and Zheng, Yuanming and Wang, Suting and Yang, Junqi and Yang, Yuhong and Tu, Weiping and Wang, Zhongyuan}, title = {GEM-TFL: Bridging Weak and Full Supervision for Forgery Localization through EM-Guided Decomposition and Temporal Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42932-42941} }
VLM-Loc: Localization in Point Cloud Maps via Vision-Language Models: Shuhao Kang,

Youqi Liao,

Peijie Wang,

Wenlong Liao,

Qilin Zhang,

Benjamin Busam,

Xieyuanli Chen,

Yun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Shuhao and Liao, Youqi and Wang, Peijie and Liao, Wenlong and Zhang, Qilin and Busam, Benjamin and Chen, Xieyuanli and Liu, Yun}, title = {VLM-Loc: Localization in Point Cloud Maps via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41365-41375} }
Edge-Focused Super-Resolution for Omnidirectional Images with Spherical Geometric Augmentation: Shaolin Wang,

Yuying Li,

Lei Zhong,

Shigang Li,

Jianfeng Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shaolin and Li, Yuying and Zhong, Lei and Li, Shigang and Li, Jianfeng}, title = {Edge-Focused Super-Resolution for Omnidirectional Images with Spherical Geometric Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38198-38207} }
From Panel to Pixel: Zoom-In Vision-Language Pretraining from Biomedical Scientific Literature: Kun Yuan,

Min Sun,

Zhen Chen,

Alejandro Lozano,

Xiangteng He,

Shi Li,

Nassir Navab,

Xiaoxiao Sun,

Nicolas Padoy,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Kun and Sun, Min and Chen, Zhen and Lozano, Alejandro and He, Xiangteng and Li, Shi and Navab, Nassir and Sun, Xiaoxiao and Padoy, Nicolas and Yeung-Levy, Serena}, title = {From Panel to Pixel: Zoom-In Vision-Language Pretraining from Biomedical Scientific Literature}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42649-42658} }
Love Me, Love My Label: Rethinking the Role of Labels in Prompt Retrieval for Visual In-Context Learning: Tianci Luo,

Haohao Pan,

Jinpeng Wang,

Niu Lian,

Xinrui Chen,

Bin Chen,

Shu-Tao Xia,

Chun Yuan; [pdf] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Tianci and Pan, Haohao and Wang, Jinpeng and Lian, Niu and Chen, Xinrui and Chen, Bin and Xia, Shu-Tao and Yuan, Chun}, title = {Love Me, Love My Label: Rethinking the Role of Labels in Prompt Retrieval for Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38774-38783} }
OralGPT-Omni: A Versatile Dental Multimodal Large Language Model: Jing Hao,

Yuci Liang,

Lizhuo Lin,

Yuxuan Fan,

Wenkai Zhou,

Kaixin Guo,

Zanting Ye,

Yanpeng Sun,

Xinyu Zhang,

Yanqi Yang,

Qiankun Li,

Hao Tang,

James Kit-Hon Tsoi,

Linlin Shen,

Kuo Feng Hung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Jing and Liang, Yuci and Lin, Lizhuo and Fan, Yuxuan and Zhou, Wenkai and Guo, Kaixin and Ye, Zanting and Sun, Yanpeng and Zhang, Xinyu and Yang, Yanqi and Li, Qiankun and Tang, Hao and Tsoi, James Kit-Hon and Shen, Linlin and Hung, Kuo Feng}, title = {OralGPT-Omni: A Versatile Dental Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38509-38519} }
Adaptive Spectral Feature Forecasting for Diffusion Sampling Acceleration: Jiaqi Han,

Juntong Shi,

Puheng Li,

Haotian Ye,

Qiushan Guo,

Stefano Ermon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jiaqi and Shi, Juntong and Li, Puheng and Ye, Haotian and Guo, Qiushan and Ermon, Stefano}, title = {Adaptive Spectral Feature Forecasting for Diffusion Sampling Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43320-43330} }
Progressive Neural Architecture Generation: Caiyang Yu,

Chen Huang,

Yun Liu,

Chenwei Tang,

Wei Ju,

Jiancheng Lv; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Caiyang and Huang, Chen and Liu, Yun and Tang, Chenwei and Ju, Wei and Lv, Jiancheng}, title = {Progressive Neural Architecture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34524-34534} }
EDGS: Eliminating Densification for Efficient Convergence of 3DGS: Dmytro Kotovenko,

Olga Grebenkova,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kotovenko_2026_CVPR, author = {Kotovenko, Dmytro and Grebenkova, Olga and Ommer, Bj\"orn}, title = {EDGS: Eliminating Densification for Efficient Convergence of 3DGS}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41065-41076} }
CubeComposer: Spatio-Temporal Autoregressive 4K 360deg Video Generation from Perspective Video: Lingen Li,

Guangzhi Wang,

Xiaoyu Li,

Zhaoyang Zhang,

Qi Dou,

Jinwei Gu,

Tianfan Xue,

Ying Shan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lingen and Wang, Guangzhi and Li, Xiaoyu and Zhang, Zhaoyang and Dou, Qi and Gu, Jinwei and Xue, Tianfan and Shan, Ying}, title = {CubeComposer: Spatio-Temporal Autoregressive 4K 360deg Video Generation from Perspective Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32625-32635} }
Failure Modes for Deep Learning-Based Online Mapping: How to Measure and Address Them: Michael Hubbertz,

Qi Han,

Tobias Meisen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hubbertz_2026_CVPR, author = {Hubbertz, Michael and Han, Qi and Meisen, Tobias}, title = {Failure Modes for Deep Learning-Based Online Mapping: How to Measure and Address Them}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39755-39764} }
TR2M: Transferring Monocular Relative Depth to Metric Depth with Language Descriptions and Dual-Level Scale-Oriented Contrast: Beilei Cui,

Yiming Huang,

Long Bai,

Hongliang Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Beilei and Huang, Yiming and Bai, Long and Ren, Hongliang}, title = {TR2M: Transferring Monocular Relative Depth to Metric Depth with Language Descriptions and Dual-Level Scale-Oriented Contrast}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34181-34192} }
ReManNet: A Riemannian Manifold Network for Monocular 3D Lane Detection: Chengzhi Hong,

Bijun Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Chengzhi and Li, Bijun}, title = {ReManNet: A Riemannian Manifold Network for Monocular 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33046-33056} }
VideoAuto-R1: Video Auto Reasoning via Thinking Once, Answering Twice: Shuming Liu,

Mingchen Zhuge,

Changsheng Zhao,

Jun Chen,

Lemeng Wu,

Zechun Liu,

Chenchen Zhu,

Zhipeng Cai,

Chong Zhou,

Haozhe Liu,

Ernie Chang,

Saksham Suri,

Hongyu Xu,

Qi Qian,

Wei Wen,

Balakrishnan Varadarajan,

Zhuang Liu,

Hu Xu,

Florian Bordes,

Raghuraman Krishnamoorthi,

Bernard Ghanem,

Vikas Chandra,

Yunyang Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shuming and Zhuge, Mingchen and Zhao, Changsheng and Chen, Jun and Wu, Lemeng and Liu, Zechun and Zhu, Chenchen and Cai, Zhipeng and Zhou, Chong and Liu, Haozhe and Chang, Ernie and Suri, Saksham and Xu, Hongyu and Qian, Qi and Wen, Wei and Varadarajan, Balakrishnan and Liu, Zhuang and Xu, Hu and Bordes, Florian and Krishnamoorthi, Raghuraman and Ghanem, Bernard and Chandra, Vikas and Xiong, Yunyang}, title = {VideoAuto-R1: Video Auto Reasoning via Thinking Once, Answering Twice}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32829-32839} }
Taming the Long Tail: Rebalancing Adversarial Training via Adaptive Perturbation: Lilin Zhang,

Yimo Guo,

Yue Li,

Jiancheng Shi,

Xianggen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lilin and Guo, Yimo and Li, Yue and Shi, Jiancheng and Liu, Xianggen}, title = {Taming the Long Tail: Rebalancing Adversarial Training via Adaptive Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34900-34907} }
Parallax to Align Them All: An OmniParallax Attention Mechanism for Distributed Multi-View Image Compression: Haotian Zhang,

Feiyue Long,

Yixin Yu,

Jian Xue,

Haocheng Tang,

Tongda Xu,

Zhenning Shi,

Yan Wang,

Siwei Ma,

Jiaqi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haotian and Long, Feiyue and Yu, Yixin and Xue, Jian and Tang, Haocheng and Xu, Tongda and Shi, Zhenning and Wang, Yan and Ma, Siwei and Zhang, Jiaqi}, title = {Parallax to Align Them All: An OmniParallax Attention Mechanism for Distributed Multi-View Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41322-41331} }
Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation: Chenxi Zhao,

Chen Zhu,

Xiaokun Feng,

Aiming Hao,

Jiashu Zhu,

Jiachen Lei,

Jiahong Wu,

Xiangxiang Chu,

Jufeng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhu, Chen and Feng, Xiaokun and Hao, Aiming and Zhu, Jiashu and Lei, Jiachen and Wu, Jiahong and Chu, Xiangxiang and Yang, Jufeng}, title = {Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36649-36659} }
VidPrism: Heterogeneous Mixture of Experts for Image-to-Video Transfer: Rui Lin,

Chuanming Wang,

Huadong Ma; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Rui and Wang, Chuanming and Ma, Huadong}, title = {VidPrism: Heterogeneous Mixture of Experts for Image-to-Video Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31230-31239} }
GeoAgent: Learning to Geolocate Everywhere with Reinforced Geographic Characteristics: Modi Jin,

Yiming Zhang,

Boyuan Sun,

Dingwen Zhang,

Ming-Ming Cheng,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Modi and Zhang, Yiming and Sun, Boyuan and Zhang, Dingwen and Cheng, Ming-Ming and Hou, Qibin}, title = {GeoAgent: Learning to Geolocate Everywhere with Reinforced Geographic Characteristics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41352-41364} }
ViHOI: Human-Object Interaction Synthesis with Visual Priors: Songjin Cai,

Linjie Zhong,

Ling Guo,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Songjin and Zhong, Linjie and Guo, Ling and Ding, Changxing}, title = {ViHOI: Human-Object Interaction Synthesis with Visual Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30686-30695} }
FILTR: Extracting Topological Features from Pretrained 3D Models: Louis Martinez,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Martinez_2026_CVPR, author = {Martinez, Louis and Ovsjanikov, Maks}, title = {FILTR: Extracting Topological Features from Pretrained 3D Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36179-36189} }
MSGNav: Unleashing the Power of Multi-modal 3D Scene Graph for Zero-Shot Embodied Navigation: Xun Huang,

Shijia Zhao,

Yunxiang Wang,

Xin Lu,

Wanfa Zhang,

Rongsheng Qu,

Weixin Li,

Yunhong Wang,

Chenglu Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xun and Zhao, Shijia and Wang, Yunxiang and Lu, Xin and Zhang, Wanfa and Qu, Rongsheng and Li, Weixin and Wang, Yunhong and Wen, Chenglu}, title = {MSGNav: Unleashing the Power of Multi-modal 3D Scene Graph for Zero-Shot Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37154-37163} }
Subspace Alignment for CLIP-based Continual Learning via Canonical Correlation Analysis: Huan Zhang,

Shuyu Dong,

Yujin Zheng,

Dingwen Wang,

Shenghua Fan,

Fan Lyu; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Huan and Dong, Shuyu and Zheng, Yujin and Wang, Dingwen and Fan, Shenghua and Lyu, Fan}, title = {Subspace Alignment for CLIP-based Continual Learning via Canonical Correlation Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32257-32266} }
From Feature Learning to Spectral Basis Learning: A Unifying and Flexible Framework for Efficient and Robust Shape Matching: Feifan Luo,

Hongyang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Feifan and Chen, Hongyang}, title = {From Feature Learning to Spectral Basis Learning: A Unifying and Flexible Framework for Efficient and Robust Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31377-31388} }
Diffusion Forcing Planner: History-Annealed Planning with Time-Dependent Guidance for Autonomous Driving: Zehan Zhang,

Yaoyi Li,

Neng Zhang,

Jia Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zehan and Li, Yaoyi and Zhang, Neng and Cai, Jia}, title = {Diffusion Forcing Planner: History-Annealed Planning with Time-Dependent Guidance for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39796-39805} }
TC-Pade: Trajectory-Consistent Pade Approximation for Diffusion Acceleration: Shaoxuan He,

Benlei Cui,

Bukun Huang,

Zhizeng Ye,

Yunyun Sun,

Longtao Huang,

Hui Xue,

Yang Yang,

Haiwen Hong,

Jingqun Tang,

Zhou Zhao; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Shaoxuan and Cui, Benlei and Huang, Bukun and Ye, Zhizeng and Sun, Yunyun and Huang, Longtao and Xue, Hui and Yang, Yang and Hong, Haiwen and Tang, Jingqun and Zhao, Zhou}, title = {TC-Pade: Trajectory-Consistent Pade Approximation for Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35768-35778} }
Red-teaming Retrieval-Augmented Diffusion Models via Poisoning Knowledge Bases: Xinqi Lyu,

Yihao Liu,

Dong Wang,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Xinqi and Liu, Yihao and Wang, Dong and Xiao, Bin}, title = {Red-teaming Retrieval-Augmented Diffusion Models via Poisoning Knowledge Bases}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34961-34970} }
StreamReady: Learning What to Answer and When in Long Streaming Videos: Shehreen Azad,

Vibhav Vineet,

Yogesh S Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Azad_2026_CVPR, author = {Azad, Shehreen and Vineet, Vibhav and Rawat, Yogesh S}, title = {StreamReady: Learning What to Answer and When in Long Streaming Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40494-40504} }
V-Attack: Targeting Disentangled Value Features for Controllable Adversarial Attacks on LVLMs: Sen Nie,

Jie Zhang,

Jianxin Yan,

Shiguang Shan,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Sen and Zhang, Jie and Yan, Jianxin and Shan, Shiguang and Chen, Xilin}, title = {V-Attack: Targeting Disentangled Value Features for Controllable Adversarial Attacks on LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42257-42267} }
3D Space as a Scratchpad for Editable Text-to-Image Generation: Oindrila Saha,

Vojtech Krs,

Radomir Mech,

Subhransu Maji,

Matheus Gadelha,

Kevin Blackburn-Matzen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saha_2026_CVPR, author = {Saha, Oindrila and Krs, Vojtech and Mech, Radomir and Maji, Subhransu and Gadelha, Matheus and Blackburn-Matzen, Kevin}, title = {3D Space as a Scratchpad for Editable Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29233-29243} }
Confusion-Aware Spectral Regularizer for Long-Tailed Recognition: Ziquan Zhu,

Gaojie Jin,

Hanruo Zhu,

Si-Yuan Lu,

Yunxiao Zhang,

Zeyu Fu,

Ronghui Mu,

Guoqiang Zhang,

Zhao Sun,

Yuhang Xia,

Jiaxing Shang,

Xiang Li,

Lu Liu,

Tianjin Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziquan and Jin, Gaojie and Zhu, Hanruo and Lu, Si-Yuan and Zhang, Yunxiao and Fu, Zeyu and Mu, Ronghui and Zhang, Guoqiang and Sun, Zhao and Xia, Yuhang and Shang, Jiaxing and Li, Xiang and Liu, Lu and Huang, Tianjin}, title = {Confusion-Aware Spectral Regularizer for Long-Tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28712-28722} }
Semantic Noise Reduction via Teacher-Guided Dual-Path Audio-Visual Representation Learning: Linge Wang,

Yingying Chen,

Bingke Zhu,

Lu Zhou,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Linge and Chen, Yingying and Zhu, Bingke and Zhou, Lu and Wang, Jinqiao}, title = {Semantic Noise Reduction via Teacher-Guided Dual-Path Audio-Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32005-32014} }
PhysInOne: Visual Physics Learning and Reasoning in One Suite: Siyuan Zhou,

Hejun Wang,

Hu Cheng,

Jinxi Li,

Dongsheng Wang,

Junwei Jiang,

Yixiao Jin,

Jiayue Huang,

Shiwei Mao,

Shangjia Liu,

Yafei Yang,

Hongkang Song,

Shenxing Wei,

Zihui Zhang,

Bing Wang,

Zhihua Wang,

Chuhang Zou,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Siyuan and Wang, Hejun and Cheng, Hu and Li, Jinxi and Wang, Dongsheng and Jiang, Junwei and Jin, Yixiao and Huang, Jiayue and Mao, Shiwei and Liu, Shangjia and Yang, Yafei and Song, Hongkang and Wei, Shenxing and Zhang, Zihui and Wang, Bing and Wang, Zhihua and Zou, Chuhang and Yang, Bo}, title = {PhysInOne: Visual Physics Learning and Reasoning in One Suite}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33131-33142} }
Omni-Attack: Adversarial Attacks on Open-Ended VQA in Black-Box Multimodal LLMs: Kai Hu,

Weichen Yu,

Li Zhang,

Alexander Robey,

Andy Zou,

Haoqi Hu,

Chengming Xu,

Matt Fredrikson; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Kai and Yu, Weichen and Zhang, Li and Robey, Alexander and Zou, Andy and Hu, Haoqi and Xu, Chengming and Fredrikson, Matt}, title = {Omni-Attack: Adversarial Attacks on Open-Ended VQA in Black-Box Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42341-42351} }
From Intuition to Investigation: A Tool-Augmented Reasoning MLLM Framework for Generalizable Face Anti-Spoofing: Haoyuan Zhang,

Keyao Wang,

Guosheng Zhang,

Haixiao Yue,

Zhiwen Tan,

Siran Peng,

Tianshuo Zhang,

Xiao Tan,

Kunbin Chen,

Wei He,

Jingdong Wang,

Ajian Liu,

Xiangyu Zhu,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haoyuan and Wang, Keyao and Zhang, Guosheng and Yue, Haixiao and Tan, Zhiwen and Peng, Siran and Zhang, Tianshuo and Tan, Xiao and Chen, Kunbin and He, Wei and Wang, Jingdong and Liu, Ajian and Zhu, Xiangyu and Lei, Zhen}, title = {From Intuition to Investigation: A Tool-Augmented Reasoning MLLM Framework for Generalizable Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40855-40865} }
Learning 3D Reconstruction with Priors in Test Time: Lei Zhou,

Haoyu Wu,

Akshat Dave,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Lei and Wu, Haoyu and Dave, Akshat and Samaras, Dimitris}, title = {Learning 3D Reconstruction with Priors in Test Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36550-36560} }
EgoXtreme: A Dataset for Robust Object Pose Estimation in Egocentric Views under Extreme Conditions: Taegyoon Yoon,

Yegyu Han,

Seojin Ji,

Jaewoo Park,

Sojeong Kim,

Taein Kwon,

Hyung-Sin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Taegyoon and Han, Yegyu and Ji, Seojin and Park, Jaewoo and Kim, Sojeong and Kwon, Taein and Kim, Hyung-Sin}, title = {EgoXtreme: A Dataset for Robust Object Pose Estimation in Egocentric Views under Extreme Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40696-40706} }
Hyper-PCN: Hypergraph-Based Point Cloud Completion via High-Order Correlation Modeling: Linfei Li,

Pei Tan,

Siqi Li,

Changqing Zou,

Yue Gao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Linfei and Tan, Pei and Li, Siqi and Zou, Changqing and Gao, Yue}, title = {Hyper-PCN: Hypergraph-Based Point Cloud Completion via High-Order Correlation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39121-39130} }
FISHuman: Fine-grained Single-image 3D Human Reconstruction via Multi-view 4D Remeshing: Hanxi Liu,

Yifang Men,

Zhouhui Lian; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hanxi and Men, Yifang and Lian, Zhouhui}, title = {FISHuman: Fine-grained Single-image 3D Human Reconstruction via Multi-view 4D Remeshing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42765-42776} }
Global Prior Meets Local Consistency: Dual-Memory Augmented Vision-Language-Action Model for Efficient Robotic Manipulation: Zaijing Li,

Bing Hu,

Rui Shao,

Gongwei Chen,

Dongmei Jiang,

Pengwei Xie,

Jianye Hao,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zaijing and Hu, Bing and Shao, Rui and Chen, Gongwei and Jiang, Dongmei and Xie, Pengwei and Hao, Jianye and Nie, Liqiang}, title = {Global Prior Meets Local Consistency: Dual-Memory Augmented Vision-Language-Action Model for Efficient Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35135-35145} }
CoRiM: Conflict-driven Risk Minimization for Dynamic Multimodal Fusion: Shihao Zou,

Wei Wei; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Shihao and Wei, Wei}, title = {CoRiM: Conflict-driven Risk Minimization for Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37821-37830} }
StreamVLO: Streaming Visual-LiDAR Odometry with Cumulative Drift Compensation: Mengmeng Liu,

Jiuming Liu,

Michael Ying Yang,

Chaokang Jiang,

Jiangtao Li,

Yunpeng Zhang,

Hesheng Wang,

Francesco Nex,

Hao Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mengmeng and Liu, Jiuming and Yang, Michael Ying and Jiang, Chaokang and Li, Jiangtao and Zhang, Yunpeng and Wang, Hesheng and Nex, Francesco and Cheng, Hao}, title = {StreamVLO: Streaming Visual-LiDAR Odometry with Cumulative Drift Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39086-39097} }
WhisperNet: A Scalable Solution for Bandwidth-Efficient Collaboration: Gong Chen,

Chaokun Zhang,

Xinyan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Zhao, Xinyan}, title = {WhisperNet: A Scalable Solution for Bandwidth-Efficient Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32154-32163} }
DeX-Portrait: Disentangled and Expressive Portrait Animation via Explicit and Latent Motion Representations: Yuxiang Shi,

Zhe Li,

Yanwen Wang,

Hao Zhu,

Xun Cao,

Ligang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yuxiang and Li, Zhe and Wang, Yanwen and Zhu, Hao and Cao, Xun and Liu, Ligang}, title = {DeX-Portrait: Disentangled and Expressive Portrait Animation via Explicit and Latent Motion Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40009-40019} }
MultiModalPFN: Extending Prior-Data Fitted Networks for Multimodal Tabular Learning: Wall Kim,

Chaeyoung Song,

Hanul Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Wall and Song, Chaeyoung and Kim, Hanul}, title = {MultiModalPFN: Extending Prior-Data Fitted Networks for Multimodal Tabular Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30357-30367} }
Multi-Hierarchical Contrastive Spectral Fusion for Multi-View Clustering: Bing Cai,

Xiaoli Wang,

Gui-Fu Lu,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Bing and Wang, Xiaoli and Lu, Gui-Fu and Li, Zechao}, title = {Multi-Hierarchical Contrastive Spectral Fusion for Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39617-39626} }
Causal Motion Diffusion Models for Autoregressive Motion Generation: Qing Yu,

Akihisa Watanabe,

Kent Fujiwara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Qing and Watanabe, Akihisa and Fujiwara, Kent}, title = {Causal Motion Diffusion Models for Autoregressive Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38366-38375} }
GeoSemba: Reconstructing State Space Model for Cross Paradigm Representation in Medical Image Segmentation: Xutao Sun,

Jiarui Li,

Junwen Liu,

Yonggong Ren; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xutao and Li, Jiarui and Liu, Junwen and Ren, Yonggong}, title = {GeoSemba: Reconstructing State Space Model for Cross Paradigm Representation in Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29990-29999} }
GeCo-SRT: Geometry-aware Continual Adaptation for Cross-Task Sim-to-Real Transfer: Wenbo Yu,

Wenke Xia,

Weitao Zhang,

Di Hu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Wenbo and Xia, Wenke and Zhang, Weitao and Hu, Di}, title = {GeCo-SRT: Geometry-aware Continual Adaptation for Cross-Task Sim-to-Real Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42408-42417} }
Dynamics: Language-Based Representation for Inferring Rigid-Body Dynamics From Videos: Chia-Hsiang Kao,

Cong Phuoc Huynh,

Chien-Yi Wang,

Noranart Vesdapunt,

Stefan Stojanov,

Bharath Hariharan,

Oleksandr Obiednikov,

Ning Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Kao_2026_CVPR, author = {Kao, Chia-Hsiang and Huynh, Cong Phuoc and Wang, Chien-Yi and Vesdapunt, Noranart and Stojanov, Stefan and Hariharan, Bharath and Obiednikov, Oleksandr and Zhou, Ning}, title = {Dynamics: Language-Based Representation for Inferring Rigid-Body Dynamics From Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42364-42374} }
Specificity-aware reinforcement learning for fine-grained open-world classification: Samuele Angheben,

Davide Berasi,

Alessandro Conti,

Elisa Ricci,

Yiming Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Angheben_2026_CVPR, author = {Angheben, Samuele and Berasi, Davide and Conti, Alessandro and Ricci, Elisa and Wang, Yiming}, title = {Specificity-aware reinforcement learning for fine-grained open-world classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41467-41477} }
Forensic-Friendly Image Manipulation via Controllable Latent Diffusion: Hanyu Chen,

Haiwei Wu,

Jinyu Tian,

Jianqing LI,

Jiantao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hanyu and Wu, Haiwei and Tian, Jinyu and LI, Jianqing and Zhou, Jiantao}, title = {Forensic-Friendly Image Manipulation via Controllable Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35394-35404} }
Thinking with Programming Vision: Towards a Unified View for Thinking with Images: Zirun Guo,

Minjie Hong,

Feng Zhang,

Kai Jia,

Tao Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Zirun and Hong, Minjie and Zhang, Feng and Jia, Kai and Jin, Tao}, title = {Thinking with Programming Vision: Towards a Unified View for Thinking with Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33467-33476} }
Fast Reasoning Segmentation for Images and Videos: Yiqing Shen,

Mathias Unberath; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Yiqing and Unberath, Mathias}, title = {Fast Reasoning Segmentation for Images and Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34765-34774} }
Towards Fine-Grained Attribution: Instance-Aware Preference Optimization for Aligning Diffusion Models: Jiayang Sun,

Pin Wang,

Hongbo Wang,

Xinyue Liu,

Huaibo Huang,

Ran He; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jiayang and Wang, Pin and Wang, Hongbo and Liu, Xinyue and Huang, Huaibo and He, Ran}, title = {Towards Fine-Grained Attribution: Instance-Aware Preference Optimization for Aligning Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43155-43164} }
MoRGS: Efficient Per-Gaussian Motion Reasoning for Streamable Dynamic 3D Scenes: Wonjoon Lee,

Sungmin Woo,

Donghyeong Kim,

Jungho Lee,

Sangheon Park,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Wonjoon and Woo, Sungmin and Kim, Donghyeong and Lee, Jungho and Park, Sangheon and Lee, Sangyoun}, title = {MoRGS: Efficient Per-Gaussian Motion Reasoning for Streamable Dynamic 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41044-41053} }
TAR: Token-Aware Refinement for Fine-grained Generalized Category Discovery: Xingyu Yang,

Yu Zhang,

Siya Mi,

Xiu-Shen Wei; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Zhang, Yu and Mi, Siya and Wei, Xiu-Shen}, title = {TAR: Token-Aware Refinement for Fine-grained Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31995-32004} }
Geometrically-Constrained Agent for Spatial Reasoning: Zeren Chen,

Xiaoya Lu,

Zhijie Zheng,

Pengrui Li,

Lehan He,

Yijin Zhou,

Jing Shao,

Bohan Zhuang,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zeren and Lu, Xiaoya and Zheng, Zhijie and Li, Pengrui and He, Lehan and Zhou, Yijin and Shao, Jing and Zhuang, Bohan and Sheng, Lu}, title = {Geometrically-Constrained Agent for Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38689-38699} }
More Than Meets the Eye: A Unified Image Fusion Framework via Semantic-Pixel Entropy Trade-off for Zero-Shot Generalization: Xiaowen Liu,

Jing Li,

Hongtao Huo,

Haozhe Cao,

Renhua Wang,

Xu Dong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaowen and Li, Jing and Huo, Hongtao and Cao, Haozhe and Wang, Renhua and Dong, Xu}, title = {More Than Meets the Eye: A Unified Image Fusion Framework via Semantic-Pixel Entropy Trade-off for Zero-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41510-41520} }
CaricHarmony: Contrastive Diffusion Paths for Identity-Preserving Caricature Synthesis: Dongyu Wang,

Dar-Yen Chen,

Yi-Zhe Song; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dongyu and Chen, Dar-Yen and Song, Yi-Zhe}, title = {CaricHarmony: Contrastive Diffusion Paths for Identity-Preserving Caricature Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36126-36135} }
PhyCritic: Multimodal Critic Models for Physical AI: Tianyi Xiong,

Shihao Wang,

Guilin Liu,

Yi Dong,

Ming Li,

Heng Huang,

Jan Kautz,

Zhiding Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianyi and Wang, Shihao and Liu, Guilin and Dong, Yi and Li, Ming and Huang, Heng and Kautz, Jan and Yu, Zhiding}, title = {PhyCritic: Multimodal Critic Models for Physical AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36881-36892} }
Scaling Parallel Sequence Models to Vision Foundation Models: Yitong Jiang,

Collin McCarthy,

Hongjun Wang,

Hanrong Ye,

Qi Dou,

Tianfan Xue,

Jinwei Gu,

Jan Kautz,

Hongxu Yin,

Pavlo Molchanov,

Sifei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yitong and McCarthy, Collin and Wang, Hongjun and Ye, Hanrong and Dou, Qi and Xue, Tianfan and Gu, Jinwei and Kautz, Jan and Yin, Hongxu and Molchanov, Pavlo and Liu, Sifei}, title = {Scaling Parallel Sequence Models to Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41332-41341} }
IPR-1: Interactive Physical Reasoner: Mingyu Zhang,

Lifeng Zhuo,

Tianxi Tan,

Guocan Xie,

Xian Nie,

Yan Li,

Renjie Zhao,

Zizhu He,

Ziyu Wang,

Jiting Cai,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingyu and Zhuo, Lifeng and Tan, Tianxi and Xie, Guocan and Nie, Xian and Li, Yan and Zhao, Renjie and He, Zizhu and Wang, Ziyu and Cai, Jiting and Li, Yong-Lu}, title = {IPR-1: Interactive Physical Reasoner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33415-33425} }
Making Training-Free Diffusion Segmentors Scale with the Generative Power: Benyuan Meng,

Qianqian Xu,

Zitai Wang,

Xiaochun Cao,

Longtao Huang,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Benyuan and Xu, Qianqian and Wang, Zitai and Cao, Xiaochun and Huang, Longtao and Huang, Qingming}, title = {Making Training-Free Diffusion Segmentors Scale with the Generative Power}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35861-35871} }
GS-CLIP: Zero-shot 3D Anomaly Detection by Geometry-Aware Prompt and Synergistic View Representation Learning: Zehao Deng,

An Liu,

Yan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Zehao and Liu, An and Wang, Yan}, title = {GS-CLIP: Zero-shot 3D Anomaly Detection by Geometry-Aware Prompt and Synergistic View Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35587-35596} }
VGG-T$^3$: Offline Feed-Forward 3D Reconstruction at Scale: Sven Elflein,

Ruilong Li,

Sérgio Agostinho,

Zan Gojcic,

Laura Leal-Taixé,

Qunjie Zhou,

Aljosa Osep; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elflein_2026_CVPR, author = {Elflein, Sven and Li, Ruilong and Agostinho, S\'ergio and Gojcic, Zan and Leal-Taix\'e, Laura and Zhou, Qunjie and Osep, Aljosa}, title = {VGG-T\${\textasciicircum}3\$: Offline Feed-Forward 3D Reconstruction at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36464-36474} }
SketchRevive: Fine-Grained Pixel-to-Vector Sketch Completion with Diffusion-Prior-Guided Multimodal LLMs: Ran Zuo,

Haoxiang Hu,

Chenxi Pei,

Yanxuan Liu,

Wenwen Qiang,

Fang Liu,

Xiaoming Deng,

Cuixia Ma,

Yong-Jin Liu; [pdf]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Ran and Hu, Haoxiang and Pei, Chenxi and Liu, Yanxuan and Qiang, Wenwen and Liu, Fang and Deng, Xiaoming and Ma, Cuixia and Liu, Yong-Jin}, title = {SketchRevive: Fine-Grained Pixel-to-Vector Sketch Completion with Diffusion-Prior-Guided Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43165-43174} }
Emergent Extreme-View Geometry in 3D Foundation Models: Yiwen Zhang,

Joseph Tung,

Ruojin Cai,

David Fouhey,

Hadar Averbuch-Elor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiwen and Tung, Joseph and Cai, Ruojin and Fouhey, David and Averbuch-Elor, Hadar}, title = {Emergent Extreme-View Geometry in 3D Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36411-36421} }
Rethinking BCE Loss for Multi-Label Image Recognition with Fine-Tuning: Ao Zhou,

Zhiwei Jiang,

Zifeng Cheng,

Cong Wang,

Yafeng Yin,

Shufan Yang,

Qing Gu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Ao and Jiang, Zhiwei and Cheng, Zifeng and Wang, Cong and Yin, Yafeng and Yang, Shufan and Gu, Qing}, title = {Rethinking BCE Loss for Multi-Label Image Recognition with Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38784-38793} }
SE(3)-Equivariance with Geometric and Topological Guidance for Category-Level Object Pose Estimation: Sheng Yu,

Di-Hua Zhai,

Yuanqing Xia; [pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Sheng and Zhai, Di-Hua and Xia, Yuanqing}, title = {SE(3)-Equivariance with Geometric and Topological Guidance for Category-Level Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35114-35123} }
InstructMix2Mix: Consistent Sparse-View Editing Through Multi-View Model Personalization: Daniel Gilo,

Or Litany; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gilo_2026_CVPR, author = {Gilo, Daniel and Litany, Or}, title = {InstructMix2Mix: Consistent Sparse-View Editing Through Multi-View Model Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29000-29011} }
SEA: Evaluating Sketch Abstraction Efficiency via Element-level Commonsense Visual Question Answering: Jiho Park,

Sieun Choi,

Jaeyoon Seo,

Minho Sohn,

Yeana Kim,

Jihie Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jiho and Choi, Sieun and Seo, Jaeyoon and Sohn, Minho and Kim, Yeana and Kim, Jihie}, title = {SEA: Evaluating Sketch Abstraction Efficiency via Element-level Commonsense Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31652-31661} }
AdaSpark: Adaptive Sparsity for Efficient Long-Video Understanding: Handong Li,

Zikang Liu,

Longteng Guo,

Tongtian Yue,

Yepeng Tang,

Xinxin Zhu,

Chuanyang Zheng,

Ziming Wang,

Zhibin Wang,

Jun Song,

Cheng Yu,

Bo Zheng,

Jing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Handong and Liu, Zikang and Guo, Longteng and Yue, Tongtian and Tang, Yepeng and Zhu, Xinxin and Zheng, Chuanyang and Wang, Ziming and Wang, Zhibin and Song, Jun and Yu, Cheng and Zheng, Bo and Liu, Jing}, title = {AdaSpark: Adaptive Sparsity for Efficient Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40548-40558} }
DreamingComics: A Story Visualization Pipeline via Subject and Layout Customized Generation using Video Models: Patrick Kwon,

Chen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Patrick and Chen, Chen}, title = {DreamingComics: A Story Visualization Pipeline via Subject and Layout Customized Generation using Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36769-36780} }
DeAR: Fine-Grained VLM Adaptation by Decomposing Attention Head Roles: Yiming Ma,

Hongkun Yang,

Lionel Z. Wang,

Bin Chen,

Weizhi Xian,

Jianzhi Teng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yiming and Yang, Hongkun and Wang, Lionel Z. and Chen, Bin and Xian, Weizhi and Teng, Jianzhi}, title = {DeAR: Fine-Grained VLM Adaptation by Decomposing Attention Head Roles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31514-31523} }
ELITE: Efficient Gaussian Head Avatar from a Monocular Video via Learned Initialization and Test-time Generative Adaptation: Kim Youwang,

Lee Hyoseok,

Park Subin,

Gerard Pons-Moll,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Youwang_2026_CVPR, author = {Youwang, Kim and Hyoseok, Lee and Subin, Park and Pons-Moll, Gerard and Oh, Tae-Hyun}, title = {ELITE: Efficient Gaussian Head Avatar from a Monocular Video via Learned Initialization and Test-time Generative Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40152-40162} }
ParaUni: Enhance Generation in Unified Multimodal Model with Reinforcement-driven Hierarchical Parallel Information Interaction: Jiangtong Tan,

Lin Liu,

Jie Huang,

Xiaopeng Zhang,

Qi Tian,

Feng Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Jiangtong and Liu, Lin and Huang, Jie and Zhang, Xiaopeng and Tian, Qi and Zhao, Feng}, title = {ParaUni: Enhance Generation in Unified Multimodal Model with Reinforcement-driven Hierarchical Parallel Information Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41836-41846} }
Decoding 3D Perception via BrainSSD: Synergistic Fusion of EEG Representations from Static and Dynamic Visual Streams: Yincheng Yao,

Enze Shi,

Shu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Yincheng and Shi, Enze and Zhang, Shu}, title = {Decoding 3D Perception via BrainSSD: Synergistic Fusion of EEG Representations from Static and Dynamic Visual Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42711-42721} }
Ov3R: Open-Vocabulary Semantic 3D Reconstruction from RGB Videos: Ziren Gong,

Xiaohan Li,

Fabio Tosi,

Jiawei Han,

Stefano Mattoccia,

Jianfei Cai,

Matteo Poggi; [pdf] [arXiv]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Ziren and Li, Xiaohan and Tosi, Fabio and Han, Jiawei and Mattoccia, Stefano and Cai, Jianfei and Poggi, Matteo}, title = {Ov3R: Open-Vocabulary Semantic 3D Reconstruction from RGB Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34206-34216} }
GeoRelight: Learning Joint Geometrical Relighting and Reconstruction with Flexible Multi-Modal Diffusion Transformers: Yuxuan Xue,

Ruofan Liang,

Egor Zakharov,

Timur Bagautdinov,

Chen Cao,

Giljoo Nam,

Shunsuke Saito,

Gerard Pons-Moll,

Javier Romero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Yuxuan and Liang, Ruofan and Zakharov, Egor and Bagautdinov, Timur and Cao, Chen and Nam, Giljoo and Saito, Shunsuke and Pons-Moll, Gerard and Romero, Javier}, title = {GeoRelight: Learning Joint Geometrical Relighting and Reconstruction with Flexible Multi-Modal Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29771-29780} }
MotionCrafter: Dense Geometry and Motion Reconstruction with a 4D VAE: Ruijie Zhu,

Jiahao Lu,

Wenbo Hu,

Xiaoguang Han,

Jianfei Cai,

Ying Shan,

Chuanxia Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ruijie and Lu, Jiahao and Hu, Wenbo and Han, Xiaoguang and Cai, Jianfei and Shan, Ying and Zheng, Chuanxia}, title = {MotionCrafter: Dense Geometry and Motion Reconstruction with a 4D VAE}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40303-40315} }
cryoSENSE: Compressive Sensing Enables High-throughput Microscopy with Sparse and Generative Priors on the Protein Cryo-EM Image Manifold: Zain Shabeeb,

Daniel Saeedi,

Darin Tsui,

Vida Jamali,

Amirali Aghazadeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shabeeb_2026_CVPR, author = {Shabeeb, Zain and Saeedi, Daniel and Tsui, Darin and Jamali, Vida and Aghazadeh, Amirali}, title = {cryoSENSE: Compressive Sensing Enables High-throughput Microscopy with Sparse and Generative Priors on the Protein Cryo-EM Image Manifold}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34072-34083} }
4DEquine: Disentangling Motion and Appearance for 4D Equine Reconstruction from Monocular Video: Jin Lyu,

Liang An,

Pujin Cheng,

Yebin Liu,

Xiaoying Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Jin and An, Liang and Cheng, Pujin and Liu, Yebin and Tang, Xiaoying}, title = {4DEquine: Disentangling Motion and Appearance for 4D Equine Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32496-32506} }
Multi-Metric Representation Learning Strategy Based on Clustering for Fine-Grained Multimodal Sentiment Analysis: Yidan Wang,

Zongheng Wang,

Hongjie Xing,

Chunguo Li,

Xiaoxiao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yidan and Wang, Zongheng and Xing, Hongjie and Li, Chunguo and Liu, Xiaoxiao}, title = {Multi-Metric Representation Learning Strategy Based on Clustering for Fine-Grained Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37864-37873} }
VectorArk: Learning Practical Image Vectorization with Rounded Polygon Representation: Tarun Gehlaut,

Difan Liu,

Charu Bansal,

Krutik Malani,

Souymodip Chakraborty,

Ankit Phogat,

Matthew Fisher,

Vineet Batra; [pdf] [supp]
[bibtex]
@InProceedings{Gehlaut_2026_CVPR, author = {Gehlaut, Tarun and Liu, Difan and Bansal, Charu and Malani, Krutik and Chakraborty, Souymodip and Phogat, Ankit and Fisher, Matthew and Batra, Vineet}, title = {VectorArk: Learning Practical Image Vectorization with Rounded Polygon Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31619-31627} }
DriveCombo: Benchmarking Compositional Traffic Rule Reasoning in Autonomous Driving: Enhui Ma,

Jiahuan Zhang,

Guantian Zheng,

Tao Tang,

Shengbo Eben Li,

Yuhang Lu,

Xia Zhou,

Xueyang Zhang,

Yifei Zhan,

Kun Zhan,

Zhihui Hao,

Xianpeng Lang,

Kaicheng Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Enhui and Zhang, Jiahuan and Zheng, Guantian and Tang, Tao and Li, Shengbo Eben and Lu, Yuhang and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Hao, Zhihui and Lang, Xianpeng and Yu, Kaicheng}, title = {DriveCombo: Benchmarking Compositional Traffic Rule Reasoning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32113-32123} }
SAVE: Speech-Aware Video Representation Learning for Video-Text Retrieval: Ruixiang Zhao,

Zhihao Xu,

Bangxiang Lan,

Zijie Xin,

Jingyu Liu,

Xirong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruixiang and Xu, Zhihao and Lan, Bangxiang and Xin, Zijie and Liu, Jingyu and Li, Xirong}, title = {SAVE: Speech-Aware Video Representation Learning for Video-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31166-31175} }
Time Blindness: Why Video-Language Models Can't See What Humans Can?: Ujjwal Upadhyay,

Mukul Ranjan,

Zhiqiang Shen,

Mohamed Elhoseiny; [pdf] [supp]
[bibtex]
@InProceedings{Upadhyay_2026_CVPR, author = {Upadhyay, Ujjwal and Ranjan, Mukul and Shen, Zhiqiang and Elhoseiny, Mohamed}, title = {Time Blindness: Why Video-Language Models Can't See What Humans Can?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30906-30918} }
Stepwise Credit Assignment for GRPO on Flow-Matching Models: Yash Savani,

Branislav Kveton,

Yuchen Liu,

Yilin Wang,

Jing Shi,

Subhojyoti Mukherjee,

Nikos Vlassis,

Krishna Kumar Singh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Savani_2026_CVPR, author = {Savani, Yash and Kveton, Branislav and Liu, Yuchen and Wang, Yilin and Shi, Jing and Mukherjee, Subhojyoti and Vlassis, Nikos and Singh, Krishna Kumar}, title = {Stepwise Credit Assignment for GRPO on Flow-Matching Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42007-42017} }
BAgger: Backwards Aggregation for Mitigating Drift in Autoregressive Video Diffusion Models: Ryan Po,

Eric Ryan Chan,

Changan Chen,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Po_2026_CVPR, author = {Po, Ryan and Chan, Eric Ryan and Chen, Changan and Wetzstein, Gordon}, title = {BAgger: Backwards Aggregation for Mitigating Drift in Autoregressive Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43727-43739} }
Image-to-Point Cloud Feature Back-Projection for Multimodal Training of 3D Semantic Segmentation: Jiawei Han,

Matteo Poggi,

Li Huan,

Changshuo Wang,

Kaiqi Liu,

Wei Li; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jiawei and Poggi, Matteo and Huan, Li and Wang, Changshuo and Liu, Kaiqi and Li, Wei}, title = {Image-to-Point Cloud Feature Back-Projection for Multimodal Training of 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42049-42060} }
Improved Mean Flows: On the Challenges of Fastforward Generative Models: Zhengyang Geng,

Yiyang Lu,

Zongze Wu,

Eli Shechtman,

J. Zico Kolter,

Kaiming He; [pdf] [arXiv]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Zhengyang and Lu, Yiyang and Wu, Zongze and Shechtman, Eli and Kolter, J. Zico and He, Kaiming}, title = {Improved Mean Flows: On the Challenges of Fastforward Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30467-30476} }
POLAR: A Portrait OLAT Dataset and Generative Framework for Illumination-Aware Face Modeling: Zhuo Chen,

Chengqun Yang,

Zhuo Su,

Zheng Lv,

Jingnan Gao,

Xiaoyuan Zhang,

Xiaokang Yang,

Yichao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuo and Yang, Chengqun and Su, Zhuo and Lv, Zheng and Gao, Jingnan and Zhang, Xiaoyuan and Yang, Xiaokang and Yan, Yichao}, title = {POLAR: A Portrait OLAT Dataset and Generative Framework for Illumination-Aware Face Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28871-28881} }
2D-LFM: Lifting Foundation Model without 3D Supervision: Mosam Dabhi,

Irhas Gill,

László A. Jeni,

Simon Lucey; [pdf] [supp]
[bibtex]
@InProceedings{Dabhi_2026_CVPR, author = {Dabhi, Mosam and Gill, Irhas and Jeni, L\'aszl\'o A. and Lucey, Simon}, title = {2D-LFM: Lifting Foundation Model without 3D Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34303-34311} }
Bi-Bridge: Bidirectional Diffusion Bridges for Low-Light Image Enhancement: Zeyu Hua,

Hui Li,

Yu Wang,

Song Wang,

Congchao Zhu,

Caixia Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Hua_2026_CVPR, author = {Hua, Zeyu and Li, Hui and Wang, Yu and Wang, Song and Zhu, Congchao and Zheng, Caixia}, title = {Bi-Bridge: Bidirectional Diffusion Bridges for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37455-37464} }
StaMo: Unsupervised Learning of Generalizable Robot Motion from Compact State Representation: Mingyu Liu,

Jiuhe Shu,

Hui Chen,

Zeju Li,

Canyu Zhao,

Jiange Yang,

Shenyuan Gao,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mingyu and Shu, Jiuhe and Chen, Hui and Li, Zeju and Zhao, Canyu and Yang, Jiange and Gao, Shenyuan and Chen, Hao and Shen, Chunhua}, title = {StaMo: Unsupervised Learning of Generalizable Robot Motion from Compact State Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35014-35024} }
Mind the Way You Select Negative Texts: Pursuing the Distance Consistency in OOD Detection with VLMs: Zhikang Xu,

Qianqian Xu,

Zitai Wang,

Cong Hua,

Sicong Li,

Zhiyong Yang,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhikang and Xu, Qianqian and Wang, Zitai and Hua, Cong and Li, Sicong and Yang, Zhiyong and Huang, Qingming}, title = {Mind the Way You Select Negative Texts: Pursuing the Distance Consistency in OOD Detection with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34650-34661} }
DualPrim: Compact 3D Reconstruction with Positive and Negative Primitives: Xiaoxu Meng,

Zhongmin Chen,

Bo Yang,

Weikai Chen,

Weixiao Liu,

Lin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Xiaoxu and Chen, Zhongmin and Yang, Bo and Chen, Weikai and Liu, Weixiao and Gao, Lin}, title = {DualPrim: Compact 3D Reconstruction with Positive and Negative Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29081-29091} }
TagSplat: Topology-Aware Gaussian Splatting for Dynamic Mesh Modeling and Tracking: Hanzhi Guo,

Dongdong Weng,

Mo Su,

Yixiao Chen,

Xiaonuo Dongye,

Chenyu Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhi and Weng, Dongdong and Su, Mo and Chen, Yixiao and Dongye, Xiaonuo and Xu, Chenyu}, title = {TagSplat: Topology-Aware Gaussian Splatting for Dynamic Mesh Modeling and Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40971-40980} }
Machine Unlearning via Adaptive Gradient Reweighting and Multi-stage Objective Optimization: Juxin Lu,

Haoyu Shi,

Mengyao Wang,

Huaiwen Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Juxin and Shi, Haoyu and Wang, Mengyao and Zhang, Huaiwen}, title = {Machine Unlearning via Adaptive Gradient Reweighting and Multi-stage Objective Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39403-39413} }
CURVE: A Benchmark for Cultural and Multilingual Long Video Reasoning: Darshan Singh,

Arsha Nagrani,

Kawshik Manikantan,

Harman Singh,

Dinesh Tewari,

Tobias Weyand,

Cordelia Schmid,

Anelia Angelova,

Shachi Dave; [pdf] [supp]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Darshan and Nagrani, Arsha and Manikantan, Kawshik and Singh, Harman and Tewari, Dinesh and Weyand, Tobias and Schmid, Cordelia and Angelova, Anelia and Dave, Shachi}, title = {CURVE: A Benchmark for Cultural and Multilingual Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32860-32871} }
Proxy-Tuning: Tailoring Multimodal Autoregressive Models for Subject-Driven Image Generation: Yi Wu,

Shengju Qian,

Lingting Zhu,

Lei Liu,

Wandi Qiao,

Ziqiang Li,

Lequan Yu,

Bin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yi and Qian, Shengju and Zhu, Lingting and Liu, Lei and Qiao, Wandi and Li, Ziqiang and Yu, Lequan and Li, Bin}, title = {Proxy-Tuning: Tailoring Multimodal Autoregressive Models for Subject-Driven Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43331-43340} }
PropFly: Learning to Propagate via On-the-Fly Supervision from Pre-trained Video Diffusion Models: Wonyong Seo,

Jaeho Moon,

Jaehyup Lee,

Soo Ye Kim,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Wonyong and Moon, Jaeho and Lee, Jaehyup and Kim, Soo Ye and Kim, Munchurl}, title = {PropFly: Learning to Propagate via On-the-Fly Supervision from Pre-trained Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43228-43238} }
LocateAnything3D: Vision-Language 3D Detection with Chain-of-Sight: Yunze Man,

Shihao Wang,

Guowen Zhang,

Johan Bjorck,

Liang-Yan Gui,

Jim Fan,

Jan Kautz,

Yu-Xiong Wang,

Zhiding Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Man_2026_CVPR, author = {Man, Yunze and Wang, Shihao and Zhang, Guowen and Bjorck, Johan and Gui, Liang-Yan and Fan, Jim and Kautz, Jan and Wang, Yu-Xiong and Yu, Zhiding}, title = {LocateAnything3D: Vision-Language 3D Detection with Chain-of-Sight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31089-31102} }
ViBES: A Conversational Agent with Behaviorally-Intelligent 3D Virtual Body: Juze Zhang,

Changan Chen,

Xin Chen,

Heng Yu,

Tiange Xiang,

Ali Sartaz Khan,

Shrinidhi K. Lakshmikanth,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Juze and Chen, Changan and Chen, Xin and Yu, Heng and Xiang, Tiange and Khan, Ali Sartaz and Lakshmikanth, Shrinidhi K. and Adeli, Ehsan}, title = {ViBES: A Conversational Agent with Behaviorally-Intelligent 3D Virtual Body}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39994-40008} }
Image-based Outlier Synthesis With Training Data: Sudarshan Regmi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Regmi_2026_CVPR, author = {Regmi, Sudarshan}, title = {Image-based Outlier Synthesis With Training Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39338-39350} }
Thermal-Det: Language-Guided Cross-Modal Distillation for Open-Vocabulary Thermal Object Detection: Yasiru Ranasinghe,

Elim Schenck,

Florence Yellin,

Shuowen Hu,

Christopher Funk,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranasinghe_2026_CVPR, author = {Ranasinghe, Yasiru and Schenck, Elim and Yellin, Florence and Hu, Shuowen and Funk, Christopher and Patel, Vishal M.}, title = {Thermal-Det: Language-Guided Cross-Modal Distillation for Open-Vocabulary Thermal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34628-34637} }
DLWM: Dual Latent World Models enable Holistic Gaussian-centric Pre-training in Autonomous Driving: Yiyao Zhu,

Ying Xue,

Haiming Zhang,

Guangfeng Jiang,

Wending Zhou,

Xu Yan,

Jiantao Gao,

Yingjie Cai,

Bingbing Liu,

Zhen Li,

Shaojie Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yiyao and Xue, Ying and Zhang, Haiming and Jiang, Guangfeng and Zhou, Wending and Yan, Xu and Gao, Jiantao and Cai, Yingjie and Liu, Bingbing and Li, Zhen and Shen, Shaojie}, title = {DLWM: Dual Latent World Models enable Holistic Gaussian-centric Pre-training in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39713-39723} }
Dual-Granularity Memory for Efficient Video Generation: Hongjun Wang,

Lin Liu,

Jianguo Li,

Tao Lin; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hongjun and Liu, Lin and Li, Jianguo and Lin, Tao}, title = {Dual-Granularity Memory for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38016-38026} }
VinQA: Visual Elements Interleaved Long-form Answer Generation for Real-World Multimodal Document QA: Young Rok Jang,

Hyesoo Kong,

Kyunghwan An,

Jae Sub Huh,

Gyeonghun KIM,

Stanley Jungkyu Choi; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Young Rok and Kong, Hyesoo and An, Kyunghwan and Huh, Jae Sub and KIM, Gyeonghun and Choi, Stanley Jungkyu}, title = {VinQA: Visual Elements Interleaved Long-form Answer Generation for Real-World Multimodal Document QA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41130-41139} }
A Bit is All You Need! Efficient Video Capture via Single Bit Imaging: Kanchana Vaishnavi Gandikota,

Michael Moeller,

Andreas Kolb,

Bhaskar Choubey,

Paramanand Chandramouli; [pdf] [supp]
[bibtex]
@InProceedings{Gandikota_2026_CVPR, author = {Gandikota, Kanchana Vaishnavi and Moeller, Michael and Kolb, Andreas and Choubey, Bhaskar and Chandramouli, Paramanand}, title = {A Bit is All You Need! Efficient Video Capture via Single Bit Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34016-34027} }
BuildingGPT: Auto-Regressive Building Wireframe Reconstruction Model with Reinforcement Learning: Yuzhou Liu,

Lingjie Zhu,

Hanqiao Ye,

Yujun Liu,

Shangfeng Huang,

Xiang Gao,

Ruisheng Wang,

Shuhan Shen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuzhou and Zhu, Lingjie and Ye, Hanqiao and Liu, Yujun and Huang, Shangfeng and Gao, Xiang and Wang, Ruisheng and Shen, Shuhan}, title = {BuildingGPT: Auto-Regressive Building Wireframe Reconstruction Model with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36400-36410} }
Unlocking the Power of Critical Factors for 3D Visual Geometry Estimation: Guangkai Xu,

Hua Geng,

Huanyi Zheng,

Songyi Yin,

Yanlong Sun,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guangkai and Geng, Hua and Zheng, Huanyi and Yin, Songyi and Sun, Yanlong and Chen, Hao and Shen, Chunhua}, title = {Unlocking the Power of Critical Factors for 3D Visual Geometry Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28979-28989} }
No Need For Real Anomaly: MLLM Empowered Zero-Shot Video Anomaly Detection: Zunkai Dai,

Ke Li,

Jiajia Liu,

Jie Yang,

Yuanyuan Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Zunkai and Li, Ke and Liu, Jiajia and Yang, Jie and Qiao, Yuanyuan}, title = {No Need For Real Anomaly: MLLM Empowered Zero-Shot Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35648-35658} }
LottieGPT: Tokenizing Vector Animation for Autoregressive Generation: Junhao Chen,

Kejun Gao,

Yuehan Cui,

Mingze Sun,

Mingjin Chen,

Shaohui Wang,

Xiaoxiao Long,

Fei Ma,

Qi Tian,

Hao Zhao,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junhao and Gao, Kejun and Cui, Yuehan and Sun, Mingze and Chen, Mingjin and Wang, Shaohui and Long, Xiaoxiao and Ma, Fei and Tian, Qi and Zhao, Hao and Huang, Ruqi}, title = {LottieGPT: Tokenizing Vector Animation for Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31639-31651} }
Matching Every Pair to Track Every Point: PairFormer for All-Pairs Tracking and Video Trajectory Fields: Guangyang Wu,

Youran Ding,

Xinyu Che,

Benyuan Sun,

Yi Yang,

Xiaohong Liu; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Guangyang and Ding, Youran and Che, Xinyu and Sun, Benyuan and Yang, Yi and Liu, Xiaohong}, title = {Matching Every Pair to Track Every Point: PairFormer for All-Pairs Tracking and Video Trajectory Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35187-35196} }
Upsample Anything: A Simple and Hard to Beat Baseline for Feature Upsampling: Minseok Seo,

Mark Hamilton,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Minseok and Hamilton, Mark and Kim, Changick}, title = {Upsample Anything: A Simple and Hard to Beat Baseline for Feature Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29707-29716} }
LESA: Learnable Stage-Aware Predictors for Diffusion Model Acceleration: Peiliang Cai,

Jiacheng Liu,

Haowen Xu,

Xinyu Wang,

Chang Zou,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Peiliang and Liu, Jiacheng and Xu, Haowen and Wang, Xinyu and Zou, Chang and Zhang, Linfeng}, title = {LESA: Learnable Stage-Aware Predictors for Diffusion Model Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43300-43309} }
Residual Connections Harm Generative Representation Learning: Xiao Zhang,

Ruoxi Jiang,

William Gao,

Rebecca Willet,

Michael Maire; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiao and Jiang, Ruoxi and Gao, William and Willet, Rebecca and Maire, Michael}, title = {Residual Connections Harm Generative Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39669-39679} }
SANER: Switchable Adapter with Non-parametric Enhanced Routing for Person De-Reidentification: Yimin Liu,

Nan Pu,

Fengxiang Yang,

Wenjing Li,

Zhihui Li,

Zhun Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yimin and Pu, Nan and Yang, Fengxiang and Li, Wenjing and Li, Zhihui and Zhong, Zhun}, title = {SANER: Switchable Adapter with Non-parametric Enhanced Routing for Person De-Reidentification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40376-40385} }
LangRef3DGS: Natural Language-Guided 3D Referential Segmentation from Partial Observations via 3D Gaussian Splatting: Xulun Ye,

Qin Zhang,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Zhang, Qin and Zhou, Kun}, title = {LangRef3DGS: Natural Language-Guided 3D Referential Segmentation from Partial Observations via 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38595-38605} }
Premier: Personalized Preference Modulation with Learnable User Embedding in Text-to-Image Generation: Zihao Wang,

Yuxiang Wei,

Xinpeng Zhou,

Tianyu Zhang,

Tao Liang,

Yalong Bai,

Hongzhi Zhang,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihao and Wei, Yuxiang and Zhou, Xinpeng and Zhang, Tianyu and Liang, Tao and Bai, Yalong and Zhang, Hongzhi and Zuo, Wangmeng}, title = {Premier: Personalized Preference Modulation with Learnable User Embedding in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29146-29156} }
Computer Vision with a Superpixelation Camera: Sasidharan Mahalingam,

Rachel Brown,

Atul Ingle; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mahalingam_2026_CVPR, author = {Mahalingam, Sasidharan and Brown, Rachel and Ingle, Atul}, title = {Computer Vision with a Superpixelation Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41773-41783} }
Elastic3D: Controllable Stereo Video Conversion with Guided Latent Decoding: Nando Metzger,

Prune Truong,

Goutam Bhat,

Konrad Schindler,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Metzger_2026_CVPR, author = {Metzger, Nando and Truong, Prune and Bhat, Goutam and Schindler, Konrad and Tombari, Federico}, title = {Elastic3D: Controllable Stereo Video Conversion with Guided Latent Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32693-32703} }
DuetMerging: Synergizing Dynamic and Static Strategies for Mitigating Task Interference in Model Merging: Yan Li,

Guiping Cao,

Yaguang Song,

Ming Tao,

Haoran Gong,

Junhui Liu,

Yaowei Wang,

Dongmei Jiang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Cao, Guiping and Song, Yaguang and Tao, Ming and Gong, Haoran and Liu, Junhui and Wang, Yaowei and Jiang, Dongmei}, title = {DuetMerging: Synergizing Dynamic and Static Strategies for Mitigating Task Interference in Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41954-41963} }
Denoising as Path Planning: Training-Free Acceleration of Diffusion Models with DPCache: Bowen Cui,

Yuanbin Wang,

Huajiang Xu,

Biaolong Chen,

Aixi Zhang,

Hao Jiang,

Zhengzheng Jin,

Xu Liu,

Pipei Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Bowen and Wang, Yuanbin and Xu, Huajiang and Chen, Biaolong and Zhang, Aixi and Jiang, Hao and Jin, Zhengzheng and Liu, Xu and Huang, Pipei}, title = {Denoising as Path Planning: Training-Free Acceleration of Diffusion Models with DPCache}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43632-43642} }
GPFlow: Gaussian Prototype Probability Flow for Unsupervised Multi-Modal Anomaly Detection: Yiting Li,

Xulei Yang,

Jingyi Liao,

Jing Zhang,

Fayao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yiting and Yang, Xulei and Liao, Jingyi and Zhang, Jing and Liu, Fayao}, title = {GPFlow: Gaussian Prototype Probability Flow for Unsupervised Multi-Modal Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43103-43112} }
MVLM: Template-Free Tracking via Vision-Language Margin Confidence and Memory-Gated Tracking: Dae-Hyeon Park,

Mina Baek,

Jeong-Hun Ha,

Chan-Seop Park,

Jamshidjon Ganiev,

Seung-Hwan Bae; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Dae-Hyeon and Baek, Mina and Ha, Jeong-Hun and Park, Chan-Seop and Ganiev, Jamshidjon and Bae, Seung-Hwan}, title = {MVLM: Template-Free Tracking via Vision-Language Margin Confidence and Memory-Gated Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35156-35165} }
Machine Mental Imagery: Empower Multimodal Reasoning with Latent Visual Tokens: Zeyuan Yang,

Xueyang Yu,

Delin Chen,

Maohao Shen,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zeyuan and Yu, Xueyang and Chen, Delin and Shen, Maohao and Gan, Chuang}, title = {Machine Mental Imagery: Empower Multimodal Reasoning with Latent Visual Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33510-33520} }
Flow Map Distillation Without Data: Shangyuan Tong,

Nanye Ma,

Saining Xie,

Tommi Jaakkola; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2026_CVPR, author = {Tong, Shangyuan and Ma, Nanye and Xie, Saining and Jaakkola, Tommi}, title = {Flow Map Distillation Without Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33973-33984} }
Unified Vector Floorplan Generation via Markup Representation: Kaede Shiohara,

Toshihiko Yamasaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shiohara_2026_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko}, title = {Unified Vector Floorplan Generation via Markup Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39262-39271} }
Discover, Segment, and Select: A Progressive Mechanism for Zero-shot Camouflaged Object Segmentation: Yilong Yang,

Jianxin Tian,

Shengchuan Zhang,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yilong and Tian, Jianxin and Zhang, Shengchuan and Cao, Liujuan}, title = {Discover, Segment, and Select: A Progressive Mechanism for Zero-shot Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34745-34754} }
ForeAct: Steering Your VLA with Efficient Visual Foresight Planning: Zhuoyang Zhang,

Shang Yang,

Qinghao Hu,

Luke J. Huang,

James Hou,

Yufei Sun,

Yao Lu,

Song Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhuoyang and Yang, Shang and Hu, Qinghao and Huang, Luke J. and Hou, James and Sun, Yufei and Lu, Yao and Han, Song}, title = {ForeAct: Steering Your VLA with Efficient Visual Foresight Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37195-37205} }
RAPID: Reusing Attention Sparsity with Inter-step Adaptation for Efficient Video Diffusion: Shangran Lin,

Lu Lu,

Jian Chen,

Qiang Liu; [pdf]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Shangran and Lu, Lu and Chen, Jian and Liu, Qiang}, title = {RAPID: Reusing Attention Sparsity with Inter-step Adaptation for Efficient Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36147-36156} }
Adaptive Auxiliary Prompt Blending for Target-Faithful Diffusion Generation: Kwanyoung Lee,

SeungJu Cha,

Yebin Ahn,

Hyunwoo Oh,

Sungho Koh,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Kwanyoung and Cha, SeungJu and Ahn, Yebin and Oh, Hyunwoo and Koh, Sungho and Kim, Dong-Jin}, title = {Adaptive Auxiliary Prompt Blending for Target-Faithful Diffusion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43707-43716} }
Time-Aware One Step Diffusion Network for Real-World Image Super-Resolution: Tianyi Zhang,

Zheng-Peng Duan,

Chun-Le Guo,

Peng-Tao Jiang,

Bo Li,

Ming-Ming Cheng,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianyi and Duan, Zheng-Peng and Guo, Chun-Le and Jiang, Peng-Tao and Li, Bo and Cheng, Ming-Ming and Li, Chongyi}, title = {Time-Aware One Step Diffusion Network for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30543-30552} }
Medic-AD: Towards Medical Vision-Language Model's Clinical Intelligence: Woohyeon Park,

Jaeik Kim,

Sunghwan Steve Cho,

Pa Hong,

Wookyoung Jeong,

Yoojin Nam,

Namjoon Kim,

Ginny Y. Wong,

Ka Chun Cheung,

Jaeyoung Do; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Woohyeon and Kim, Jaeik and Cho, Sunghwan Steve and Hong, Pa and Jeong, Wookyoung and Nam, Yoojin and Kim, Namjoon and Wong, Ginny Y. and Cheung, Ka Chun and Do, Jaeyoung}, title = {Medic-AD: Towards Medical Vision-Language Model's Clinical Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36321-36331} }
RAM: Recover Any 3D Human Motion in-the-Wild: Sen Jia,

Ning Zhu,

Jinqin Zhong,

Jiale Zhou,

Huaping Zhang,

Jenq-Neng Hwang,

Lei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Sen and Zhu, Ning and Zhong, Jinqin and Zhou, Jiale and Zhang, Huaping and Hwang, Jenq-Neng and Li, Lei}, title = {RAM: Recover Any 3D Human Motion in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42789-42799} }
UniFusion: A Unified Image Fusion Framework with Robust Representation and Source-Aware Preservation: Xingyuan Li,

Songcheng Du,

Yang Zou,

Haoyuan Xu,

Zhiying Jiang,

Jinyuan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xingyuan and Du, Songcheng and Zou, Yang and Xu, Haoyuan and Jiang, Zhiying and Liu, Jinyuan}, title = {UniFusion: A Unified Image Fusion Framework with Robust Representation and Source-Aware Preservation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33869-33880} }
Just-in-Time: Training-Free Spatial Acceleration for Diffusion Transformers: Wenhao Sun,

Ji Li,

Zhaoqiang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Wenhao and Li, Ji and Liu, Zhaoqiang}, title = {Just-in-Time: Training-Free Spatial Acceleration for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40643-40652} }
Beyond Pixel Simulation: Pathology Image Generation via Diagnostic Semantic Tokens and Prototype Control: Minghao Han,

Yichen Liu,

Yizhou Liu,

Zizhi Chen,

Jingqun Tang,

Xuecheng Wu,

Dingkang Yang,

Lihua Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Minghao and Liu, Yichen and Liu, Yizhou and Chen, Zizhi and Tang, Jingqun and Wu, Xuecheng and Yang, Dingkang and Zhang, Lihua}, title = {Beyond Pixel Simulation: Pathology Image Generation via Diagnostic Semantic Tokens and Prototype Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42744-42754} }
GeoMMBench and GeoMMAgent: Toward Expert-Level Multimodal Intelligence in Geoscience and Remote Sensing: Aoran Xiao,

Shihao Cheng,

Yonghao Xu,

Yexian Ren,

Hongruixuan Chen,

Naoto Yokoya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Aoran and Cheng, Shihao and Xu, Yonghao and Ren, Yexian and Chen, Hongruixuan and Yokoya, Naoto}, title = {GeoMMBench and GeoMMAgent: Toward Expert-Level Multimodal Intelligence in Geoscience and Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34843-34853} }
DBMSolver: A Training-free Diffusion Bridge Sampler for High-Quality Image-to-Image Translation: Sankarshana Venugopal,

Mohammad Mostafavi,

Jonghyun Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Venugopal_2026_CVPR, author = {Venugopal, Sankarshana and Mostafavi, Mohammad and Choi, Jonghyun}, title = {DBMSolver: A Training-free Diffusion Bridge Sampler for High-Quality Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36062-36071} }
Mixture of Style Experts for Diverse Image Stylization: Shihao Zhu,

Ziheng Ouyang,

Yijia Kang,

Qilong Wang,

Mi Zhou,

Bo Li,

Ming-Ming Cheng,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Shihao and Ouyang, Ziheng and Kang, Yijia and Wang, Qilong and Zhou, Mi and Li, Bo and Cheng, Ming-Ming and Hou, Qibin}, title = {Mixture of Style Experts for Diverse Image Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30500-30510} }
Multinex: Lightweight Low-light Image Enhancement via Multi-prior Retinex: Alexandru Brateanu,

Tingting Mu,

Codruta O. Ancuti,

Cosmin Ancuti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brateanu_2026_CVPR, author = {Brateanu, Alexandru and Mu, Tingting and Ancuti, Codruta O. and Ancuti, Cosmin}, title = {Multinex: Lightweight Low-light Image Enhancement via Multi-prior Retinex}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29887-29896} }
BriMA: Bridged Modality Adaptation for Multi-Modal Continual Action Quality Assessment: Kanglei Zhou,

Chang Li,

Qingyi Pan,

Liyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Kanglei and Li, Chang and Pan, Qingyi and Wang, Liyuan}, title = {BriMA: Bridged Modality Adaptation for Multi-Modal Continual Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38904-38914} }
Decoupling Bias, Aligning Distributions: Synergistic Fairness Optimization for Deepfake Detection: Feng Ding,

Wenhui Yi,

Yunpeng Zhou,

Xinan He,

Hong Rao,

Shu Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Feng and Yi, Wenhui and Zhou, Yunpeng and He, Xinan and Rao, Hong and Hu, Shu}, title = {Decoupling Bias, Aligning Distributions: Synergistic Fairness Optimization for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32704-32713} }
BioVITA: Biological Dataset, Model, and Benchmark for Visual-Textual-Acoustic Alignment: Risa Shinoda,

Kaede Shiohara,

Nakamasa Inoue,

Kuniaki Saito,

Hiroaki Santo,

Fumio Okura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shinoda_2026_CVPR, author = {Shinoda, Risa and Shiohara, Kaede and Inoue, Nakamasa and Saito, Kuniaki and Santo, Hiroaki and Okura, Fumio}, title = {BioVITA: Biological Dataset, Model, and Benchmark for Visual-Textual-Acoustic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29336-29346} }
Anti-I2V: Safeguarding your Photos from Malicious Image-to-video Generation: Duc Vu,

Anh Nguyen,

Chi Tran,

Anh Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vu_2026_CVPR, author = {Vu, Duc and Nguyen, Anh and Tran, Chi and Tran, Anh}, title = {Anti-I2V: Safeguarding your Photos from Malicious Image-to-video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37621-37631} }
HFedATM: Hierarchical Federated Domain Generalization via Optimal Transport and Regularized Mean Aggregation: Thinh Nguyen,

Trung Phan,

Binh Nguyen,

Khoa D Doan,

Kok-Seng Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Phan, Trung and Nguyen, Binh and Doan, Khoa D and Wong, Kok-Seng}, title = {HFedATM: Hierarchical Federated Domain Generalization via Optimal Transport and Regularized Mean Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39435-39444} }
Scaling the Long Video Understanding of Multimodal Large Language Models via Visual Memory Mechanism: Tao Chen,

Kun Zhang,

Qiong Wu,

Xiao Chen,

Chao Chang,

Xiaoshuai Sun,

Yiyi Zhou,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tao and Zhang, Kun and Wu, Qiong and Chen, Xiao and Chang, Chao and Sun, Xiaoshuai and Zhou, Yiyi and Ji, Rongrong}, title = {Scaling the Long Video Understanding of Multimodal Large Language Models via Visual Memory Mechanism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31877-31888} }
Hyperbolic Prototype Learning with Uncertainty-Aware Consistency for Continual Test-Time Segmentation: Siddhant Gole,

Akash Pal,

Amit More,

S Divakar Bhat,

Subhasis Chaudhuri,

Biplab Banerjee; [pdf] [supp]
[bibtex]
@InProceedings{Gole_2026_CVPR, author = {Gole, Siddhant and Pal, Akash and More, Amit and Bhat, S Divakar and Chaudhuri, Subhasis and Banerjee, Biplab}, title = {Hyperbolic Prototype Learning with Uncertainty-Aware Consistency for Continual Test-Time Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34694-34703} }
Learning to Assist: Physics-Grounded Human-Human Control via Multi-Agent Reinforcement Learning: Yuto Shibata,

Kashu Yamazaki,

Lalit Jayanti,

Yoshimitsu Aoki,

Mariko Isogawa,

Katerina Fragkiadaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shibata_2026_CVPR, author = {Shibata, Yuto and Yamazaki, Kashu and Jayanti, Lalit and Aoki, Yoshimitsu and Isogawa, Mariko and Fragkiadaki, Katerina}, title = {Learning to Assist: Physics-Grounded Human-Human Control via Multi-Agent Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38345-38354} }
Photo-Guided Tooth Segmentation on 3D Oral Scan Model: Shaojie Zhuang,

Guangshun Wei,

Jiangxin He,

Yuanfeng Zhou; [pdf]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Shaojie and Wei, Guangshun and He, Jiangxin and Zhou, Yuanfeng}, title = {Photo-Guided Tooth Segmentation on 3D Oral Scan Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37558-37567} }
Multi-Scale Gaussian-Language Map for Zero-shot Embodied Navigation and Reasoning: Sixian Zhang,

Yiyao Wang,

Xinhang Song,

Keming Zhang,

Zijian Xu,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Sixian and Wang, Yiyao and Song, Xinhang and Zhang, Keming and Xu, Zijian and Jiang, Shuqiang}, title = {Multi-Scale Gaussian-Language Map for Zero-shot Embodied Navigation and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37086-37097} }
Structure-Aware Representation Distillation for Tiny-Dense Object Segmentation: Xuesong Liu,

Anke Xu,

Wenbo Cao,

Emmett Ientilucci; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xuesong and Xu, Anke and Cao, Wenbo and Ientilucci, Emmett}, title = {Structure-Aware Representation Distillation for Tiny-Dense Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34775-34783} }
Quota-Calibrated Fine-Grained Alignment with Context-Aware Marginals for Text-based Person Retrieval: Dongsheng Li,

Xinyuan Guo,

Huijie Zhang,

Pingting Hao,

Qiushi Xia; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Dongsheng and Guo, Xinyuan and Zhang, Huijie and Hao, Pingting and Xia, Qiushi}, title = {Quota-Calibrated Fine-Grained Alignment with Context-Aware Marginals for Text-based Person Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31103-31112} }
SpaceTools: Tool-Augmented Spatial Reasoning via Double Interactive RL: Siyi Chen,

Mikaela Angelina Uy,

Chan Hee Song,

Faisal Ladhak,

Adithyavairavan Murali,

Qing Qu,

Stan Birchfield,

Valts Blukis,

Jonathan Tremblay; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Siyi and Uy, Mikaela Angelina and Song, Chan Hee and Ladhak, Faisal and Murali, Adithyavairavan and Qu, Qing and Birchfield, Stan and Blukis, Valts and Tremblay, Jonathan}, title = {SpaceTools: Tool-Augmented Spatial Reasoning via Double Interactive RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37109-37120} }
VLM-Pruner: Buffering for Spatial Sparsity in an Efficient VLM Centrifugal Token Pruning Paradigm: Zhenkai Wu,

Xiaowen Ma,

Zhenliang Ni,

Dengming Zhang,

Han Shu,

Xin Jiang,

Xinghao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenkai and Ma, Xiaowen and Ni, Zhenliang and Zhang, Dengming and Shu, Han and Jiang, Xin and Chen, Xinghao}, title = {VLM-Pruner: Buffering for Spatial Sparsity in an Efficient VLM Centrifugal Token Pruning Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31952-31961} }
Anchor-Guided Gradient Alignment for Incomplete Multimodal Learning: Zhi-Hao Guan,

Longfei Huang,

Yang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Zhi-Hao and Huang, Longfei and Yang, Yang}, title = {Anchor-Guided Gradient Alignment for Incomplete Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37896-37905} }
Understanding and Enforcing Weight Disentanglement in Task Arithmetic: Shangge Liu,

Yuehan Yin,

Lei Wang,

Qi Fan,

Yinghuan Shi,

Wenbin Li,

Yang Gao,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shangge and Yin, Yuehan and Wang, Lei and Fan, Qi and Shi, Yinghuan and Li, Wenbin and Gao, Yang and Tao, Dacheng}, title = {Understanding and Enforcing Weight Disentanglement in Task Arithmetic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28744-28753} }
HAD: Hallucination-Aware Diffusion Priors for 3D Reconstruction: Xi Liu,

Weiwei Sun,

Zhou Ren,

Chris Broaddus,

Siyu Huang,

Laurent Guigues; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xi and Sun, Weiwei and Ren, Zhou and Broaddus, Chris and Huang, Siyu and Guigues, Laurent}, title = {HAD: Hallucination-Aware Diffusion Priors for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29781-29791} }
Towards Stealthy and Effective Backdoor Attacks on Lane Detection: A Naturalistic Data Poisoning Approach: Yifan Liao,

Yuxin Cao,

Yedi Zhang,

Wentao He,

Yan Xiao,

Xianglong Du,

Zhiyong Huang,

Jin Song Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Yifan and Cao, Yuxin and Zhang, Yedi and He, Wentao and Xiao, Yan and Du, Xianglong and Huang, Zhiyong and Dong, Jin Song}, title = {Towards Stealthy and Effective Backdoor Attacks on Lane Detection: A Naturalistic Data Poisoning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34950-34960} }
Small Object, Great Challenge: A Benchmark for Small Object Visual Grounding: Wenqi Jia,

Ruifan Li,

Pengyue Lin,

Fangxiang Feng,

Zhanyu Ma,

Xiaojie Wang; [pdf]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Wenqi and Li, Ruifan and Lin, Pengyue and Feng, Fangxiang and Ma, Zhanyu and Wang, Xiaojie}, title = {Small Object, Great Challenge: A Benchmark for Small Object Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31822-31832} }
Align While Search: Belief-Guided Exploratory Inference for World-Grounded Embodied Agents: Seohui Bae,

Jeonghye Kim,

Youngchul Sung,

Woohyung Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2026_CVPR, author = {Bae, Seohui and Kim, Jeonghye and Sung, Youngchul and Lim, Woohyung}, title = {Align While Search: Belief-Guided Exploratory Inference for World-Grounded Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29642-29651} }
VKG-QA: Visual Knowledge Graph-based Question Answer for Large Multimodal Models: Yuntao Du,

Yiming Wang,

Renshuo Yuan,

Jincheng Yue,

Yijing Chen,

Yue Fan,

Bo Zhang,

Qian Li,

Lizhen Cui; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Yuntao and Wang, Yiming and Yuan, Renshuo and Yue, Jincheng and Chen, Yijing and Fan, Yue and Zhang, Bo and Li, Qian and Cui, Lizhen}, title = {VKG-QA: Visual Knowledge Graph-based Question Answer for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41213-41223} }
AdaCluster: Adaptive Query-Key Clustering for Sparse Attention in Video Generation: Haoyue Tan,

Shengnan Wang,

Yulin Qiao,

Juncheng Zhang,

Youhui Bai,

Ping Gong,

Zewen Jin,

Cheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Haoyue and Wang, Shengnan and Qiao, Yulin and Zhang, Juncheng and Bai, Youhui and Gong, Ping and Jin, Zewen and Li, Cheng}, title = {AdaCluster: Adaptive Query-Key Clustering for Sparse Attention in Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43249-43259} }
LOREAL: Mitigating Low-Resolution Challenges in Vision-Language Models with Attribute-driven Prompt Self-Distillation: Xucong Wang,

Pengkun Wang,

Zhe Zhao,

Liheng Yu,

Rui Mao,

Yang Wang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xucong and Wang, Pengkun and Zhao, Zhe and Yu, Liheng and Mao, Rui and Wang, Yang}, title = {LOREAL: Mitigating Low-Resolution Challenges in Vision-Language Models with Attribute-driven Prompt Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39152-39163} }
Cov2Pose: Leveraging Spatial Covariance for Direct Manifold-aware 6-DoF Object Pose Estimation: Nassim Ali Ousalah,

Peyman Rostami,

Vincent Gaudillière,

Emmanuel Koumandakis,

Anis Kacem,

Enjie Ghorbel,

Djamila Aouada; [pdf] [supp]
[bibtex]
@InProceedings{Ousalah_2026_CVPR, author = {Ousalah, Nassim Ali and Rostami, Peyman and Gaudilli\`ere, Vincent and Koumandakis, Emmanuel and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {Cov2Pose: Leveraging Spatial Covariance for Direct Manifold-aware 6-DoF Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40727-40738} }
DriveLaW: Unifying Planning and Video Generation in a Latent Driving World: Tianze Xia,

Yongkang Li,

Lijun Zhou,

Jingfeng Yao,

Kaixin Xiong,

Haiyang Sun,

Bing Wang,

Kun Ma,

Guang Chen,

Hangjun Ye,

Wenyu Liu,

Xinggang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Tianze and Li, Yongkang and Zhou, Lijun and Yao, Jingfeng and Xiong, Kaixin and Sun, Haiyang and Wang, Bing and Ma, Kun and Chen, Guang and Ye, Hangjun and Liu, Wenyu and Wang, Xinggang}, title = {DriveLaW: Unifying Planning and Video Generation in a Latent Driving World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39701-39712} }
OrthoFuse: Training-free Riemannian Fusion of Orthogonal Style-Concept Adapters for Diffusion Models: Ali Aliev,

Kamil Garifullin,

Nikolay Yudin,

Vera Soboleva,

Alexander Molozhavenko,

Ivan Oseledets,

Aibek Alanov,

Maxim Rakhuba; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aliev_2026_CVPR, author = {Aliev, Ali and Garifullin, Kamil and Yudin, Nikolay and Soboleva, Vera and Molozhavenko, Alexander and Oseledets, Ivan and Alanov, Aibek and Rakhuba, Maxim}, title = {OrthoFuse: Training-free Riemannian Fusion of Orthogonal Style-Concept Adapters for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36009-36018} }
Generative Neural Video Compression via Video Diffusion Prior: Qi Mao,

Hao Cheng,

Tinghan Yang,

Libiao Jin,

Siwei Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Qi and Cheng, Hao and Yang, Tinghan and Jin, Libiao and Ma, Siwei}, title = {Generative Neural Video Compression via Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43239-43248} }
Understanding and Mitigating Hallucinations in Multimodal Chain-of-Thought Models: Ji Ma,

Wei Suo,

Peng Wang,

Yanning Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Ji and Suo, Wei and Wang, Peng and Zhang, Yanning}, title = {Understanding and Mitigating Hallucinations in Multimodal Chain-of-Thought Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40224-40234} }
BlackMirror: Black-Box Backdoor Detection for Text-to-Image Models via Instruction-Response Deviation: Feiran Li,

Qianqian Xu,

Shilong Bao,

Zhiyong Yang,

Xilin Zhao,

Xiaochun Cao,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Feiran and Xu, Qianqian and Bao, Shilong and Yang, Zhiyong and Zhao, Xilin and Cao, Xiaochun and Huang, Qingming}, title = {BlackMirror: Black-Box Backdoor Detection for Text-to-Image Models via Instruction-Response Deviation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30098-30109} }
Taming Video Models for 3D and 4D Generation via Zero-Shot Camera Control: Chenxi Song,

Yanming Yang,

Tong Zhao,

Ruibo Li,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Chenxi and Yang, Yanming and Zhao, Tong and Li, Ruibo and Zhang, Chi}, title = {Taming Video Models for 3D and 4D Generation via Zero-Shot Camera Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40352-40363} }
Learning Eigenstructures of Unstructured Data Manifolds: Roy Velich,

Arkadi Piven,

David Bensaïd,

Daniel Cremers,

Thomas Dagès,

Ron Kimmel; [pdf] [supp]
[bibtex]
@InProceedings{Velich_2026_CVPR, author = {Velich, Roy and Piven, Arkadi and Bensa{\"\i}d, David and Cremers, Daniel and Dag\`es, Thomas and Kimmel, Ron}, title = {Learning Eigenstructures of Unstructured Data Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36201-36214} }
Scaling Test-Time Robustness of Vision-Language Models via Self-Critical Inference Framework: Kaihua Tang,

Jiaxin Qi,

Jinli Ou,

Yuhua Zheng,

Jianqiang Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Kaihua and Qi, Jiaxin and Ou, Jinli and Zheng, Yuhua and Huang, Jianqiang}, title = {Scaling Test-Time Robustness of Vision-Language Models via Self-Critical Inference Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39361-39371} }
Spectral Mixture-of-Experts for Continual Learning: Chen Yin,

Xingbo Dong,

Xuelin Shen,

Zhe Jin; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Chen and Dong, Xingbo and Shen, Xuelin and Jin, Zhe}, title = {Spectral Mixture-of-Experts for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39972-39982} }
Learning Convex Decomposition via Feature Fields: Yuezhi Yang,

Qixing Huang,

Mikaela Angelina Uy,

Nicholas Sharp; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuezhi and Huang, Qixing and Uy, Mikaela Angelina and Sharp, Nicholas}, title = {Learning Convex Decomposition via Feature Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36190-36200} }
M4Human: A Large-Scale Multimodal mmWave Radar Benchmark for Human Mesh Reconstruction: Junqiao Fan,

Yunjiao Zhou,

Yizhuo Yang,

Xinyuan Cui,

Jiarui Zhang,

Lihua Xie,

Jianfei Yang,

Chris Xiaoxuan Lu,

Fangqiang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Junqiao and Zhou, Yunjiao and Yang, Yizhuo and Cui, Xinyuan and Zhang, Jiarui and Xie, Lihua and Yang, Jianfei and Lu, Chris Xiaoxuan and Ding, Fangqiang}, title = {M4Human: A Large-Scale Multimodal mmWave Radar Benchmark for Human Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42836-42846} }
ELiC: Efficient LiDAR Geometry Compression via Cross-Bit-depth Feature Propagation and Bag-of-Encoders: Junsik Kim,

Gun Bang,

Soowoong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Junsik and Bang, Gun and Kim, Soowoong}, title = {ELiC: Efficient LiDAR Geometry Compression via Cross-Bit-depth Feature Propagation and Bag-of-Encoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39011-39020} }
PV-Ground: Text-Guided Point-Voxel Interaction for 3D Visual Grounding: Junpeng Shang,

Feifei Shao,

Jun Xiao,

Lin Li,

Hongwei Wang,

Dongfang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Shang_2026_CVPR, author = {Shang, Junpeng and Shao, Feifei and Xiao, Jun and Li, Lin and Wang, Hongwei and Ma, Dongfang}, title = {PV-Ground: Text-Guided Point-Voxel Interaction for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38657-38667} }
Learning Differentiable Hierarchies in 3D Gaussian Splatting: Youqi Pan,

Wugen Zhou,

Hongbin Zha; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Youqi and Zhou, Wugen and Zha, Hongbin}, title = {Learning Differentiable Hierarchies in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40939-40948} }
Attention, May I Have Your Decision? Localizing Generative Choices in Diffusion Models: Katarzyna Zaleska,

Łukasz Popek,

Monika Wysoczańska,

Kamil Deja; [pdf] [supp]
[bibtex]
@InProceedings{Zaleska_2026_CVPR, author = {Zaleska, Katarzyna and Popek, {\L}ukasz and Wysocza\'nska, Monika and Deja, Kamil}, title = {Attention, May I Have Your Decision? Localizing Generative Choices in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30854-30863} }
Lenses: Toward Polysemous Vision-Language Understanding: Hani Alomari,

Ali Asgarov,

Chris Thomas; [pdf] [supp]
[bibtex]
@InProceedings{Alomari_2026_CVPR, author = {Alomari, Hani and Asgarov, Ali and Thomas, Chris}, title = {Lenses: Toward Polysemous Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37810-37820} }
Uni3R: Unified 3D Reconstruction and Semantic Understanding via Generalizable Gaussian Splatting from Unposed Multi-View Images: Xiangyu Sun,

Haoyi Jiang,

Liu Liu,

Seungtae Nam,

Gyeongjin Kang,

Xinjie Wang,

Wei Sui,

Zhizhong Su,

Wenyu Liu,

Xinggang Wang,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xiangyu and Jiang, Haoyi and Liu, Liu and Nam, Seungtae and Kang, Gyeongjin and Wang, Xinjie and Sui, Wei and Su, Zhizhong and Liu, Wenyu and Wang, Xinggang and Park, Eunbyung}, title = {Uni3R: Unified 3D Reconstruction and Semantic Understanding via Generalizable Gaussian Splatting from Unposed Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33280-33290} }
Circular-DPO: Aligning Multi-Stage 3D Generative Models via Preference Feedback Loop: Zejian Li,

Jiarui Ma,

Han Xu,

Weiting Zheng,

Yangrui Zhu,

Chenye Meng,

Pei Chen,

Ling Yang,

Zhiyuan Yang,

Changyuan Yang,

Guang Yang,

Immanuel Koh,

Lingyun Sun; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zejian and Ma, Jiarui and Xu, Han and Zheng, Weiting and Zhu, Yangrui and Meng, Chenye and Chen, Pei and Yang, Ling and Yang, Zhiyuan and Yang, Changyuan and Yang, Guang and Koh, Immanuel and Sun, Lingyun}, title = {Circular-DPO: Aligning Multi-Stage 3D Generative Models via Preference Feedback Loop}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32592-32601} }
Align Images Before You Generate: Shihua Zhang,

Qiuhong Shen,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shihua and Shen, Qiuhong and Wang, Xinchao}, title = {Align Images Before You Generate}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30521-30531} }
FantasyVLN: Unified Multimodal Chain-of-Thought Reasoning for Vision-and-Language Navigation: Jing Zuo,

Lingzhou Mu,

Fan Jiang,

Chengcheng Ma,

Mu Xu,

Yonggang Qi; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Jing and Mu, Lingzhou and Jiang, Fan and Ma, Chengcheng and Xu, Mu and Qi, Yonggang}, title = {FantasyVLN: Unified Multimodal Chain-of-Thought Reasoning for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29674-29683} }
Omni-Fake: Benchmarking Unified Multimodal Social Media Deepfake Detection: Tianxiao Li,

Zhenglin Huang,

Haiquan Wen,

Yiwei He,

Xinze Li,

Bingyu Zhu,

Wuhui Duan,

Congang Chen,

Zeyu Fu,

Yi Dong,

Baoyuan Wu,

Xiangtai Li,

Guangliang Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tianxiao and Huang, Zhenglin and Wen, Haiquan and He, Yiwei and Li, Xinze and Zhu, Bingyu and Duan, Wuhui and Chen, Congang and Fu, Zeyu and Dong, Yi and Wu, Baoyuan and Li, Xiangtai and Cheng, Guangliang}, title = {Omni-Fake: Benchmarking Unified Multimodal Social Media Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30299-30311} }
ELV-Halluc: Benchmarking Semantic Aggregation Hallucinations in Video Understanding: Hao Lu,

Jiahao Wang,

Yaolun Zhang,

Ruohui Wang,

Xuanyu Zheng,

Yepeng Tang,

Dahua Lin,

Lewei Lu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Hao and Wang, Jiahao and Zhang, Yaolun and Wang, Ruohui and Zheng, Xuanyu and Tang, Yepeng and Lin, Dahua and Lu, Lewei}, title = {ELV-Halluc: Benchmarking Semantic Aggregation Hallucinations in Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32572-32581} }
AviaSafe: A Physics-Informed Data-Driven Model for Aviation Safety-Critical Cloud Forecasts: Zijian Zhu,

Qiusheng Huang,

Anboyu Guo,

Xiaohui Zhong,

Hao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zijian and Huang, Qiusheng and Guo, Anboyu and Zhong, Xiaohui and Li, Hao}, title = {AviaSafe: A Physics-Informed Data-Driven Model for Aviation Safety-Critical Cloud Forecasts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33143-33152} }
Select, Hypothesize and Verify: Towards Verified Neuron Concept Interpretation: ZeBin Ji,

Yang Hu,

Xiuli Bi,

Bo Liu,

Bin Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, ZeBin and Hu, Yang and Bi, Xiuli and Liu, Bo and Xiao, Bin}, title = {Select, Hypothesize and Verify: Towards Verified Neuron Concept Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31316-31325} }
SparseOIT: Improving Order-Independent Transparency 3DGS via Active Set Method: Wentao Yang,

Fanzhen Kong,

Zejian Kang,

Xiangru Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Wentao and Kong, Fanzhen and Kang, Zejian and Huang, Xiangru}, title = {SparseOIT: Improving Order-Independent Transparency 3DGS via Active Set Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41012-41021} }
Guiding Token-Sparse Diffusion Models: Felix Krause,

Stefan Andreas Baumann,

Johannes Schusterbauer,

Olga Grebenkova,

Ming Gui,

Vincent Tao Hu,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krause_2026_CVPR, author = {Krause, Felix and Baumann, Stefan Andreas and Schusterbauer, Johannes and Grebenkova, Olga and Gui, Ming and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Guiding Token-Sparse Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35789-35799} }
ScenDi: 3D-to-2D Scene Diffusion Cascades for Urban Generation: Hanlei Guo,

Jiahao Shao,

Xinya Chen,

Xiyang Tan,

Sheng Miao,

Yujun Shen,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hanlei and Shao, Jiahao and Chen, Xinya and Tan, Xiyang and Miao, Sheng and Shen, Yujun and Liao, Yiyi}, title = {ScenDi: 3D-to-2D Scene Diffusion Cascades for Urban Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40291-40302} }
Grounded Chain-of-Thought for Multimodal Large Language Models: Qiong Wu,

Xiangcong Yang,

Yiyi Zhou,

Chenxin Fang,

Baiyang Song,

Xiaoshuai Sun,

Rongrong Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Qiong and Yang, Xiangcong and Zhou, Yiyi and Fang, Chenxin and Song, Baiyang and Sun, Xiaoshuai and Ji, Rongrong}, title = {Grounded Chain-of-Thought for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33577-33587} }
EventHub: Data Factory for Generalizable Event-Based Stereo Networks without Active Sensors: Luca Bartolomei,

Fabio Tosi,

Matteo Poggi,

Stefano Mattoccia,

Guillermo Gallego; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bartolomei_2026_CVPR, author = {Bartolomei, Luca and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano and Gallego, Guillermo}, title = {EventHub: Data Factory for Generalizable Event-Based Stereo Networks without Active Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37063-37074} }
Boosting Self-Supervised Tracking with Contextual Prompts and Noise Learning: Yaozong Zheng,

Qihua Liang,

Bineng Zhong,

Shuimu Zeng,

Yuanliang Xue,

Ning Li,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yaozong and Liang, Qihua and Zhong, Bineng and Zeng, Shuimu and Xue, Yuanliang and Li, Ning and Song, Shuxiang}, title = {Boosting Self-Supervised Tracking with Contextual Prompts and Noise Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35197-35206} }
UniGame: Turning a Unified Multimodal Model Into Its Own Adversary: Zhaolong Su,

Wang Lu,

Hao Chen,

Sharon Li,

Jindong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Zhaolong and Lu, Wang and Chen, Hao and Li, Sharon and Wang, Jindong}, title = {UniGame: Turning a Unified Multimodal Model Into Its Own Adversary}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37632-37641} }
GS^2: Graph-based Spatial Distribution Optimization for Compact 3D Gaussian Splatting: Xianben Yang,

Tao Wang,

Yuxuan Li,

Yi Jin,

Haibin Ling; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xianben and Wang, Tao and Li, Yuxuan and Jin, Yi and Ling, Haibin}, title = {GS{\textasciicircum}2: Graph-based Spatial Distribution Optimization for Compact 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33259-33268} }
Spatio-Temporal Difference Guided Motion Deblurring with the Complementary Vision Sensor: Yapeng Meng,

Lin Yang,

Yuguo Chen,

Xiangru Chen,

Taoyi Wang,

Lijian Wang,

Zheyu Yang,

Yihan Lin,

Rong Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Yapeng and Yang, Lin and Chen, Yuguo and Chen, Xiangru and Wang, Taoyi and Wang, Lijian and Yang, Zheyu and Lin, Yihan and Zhao, Rong}, title = {Spatio-Temporal Difference Guided Motion Deblurring with the Complementary Vision Sensor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37496-37506} }
Motus: A Unified Latent Action World Model: Hongzhe Bi,

Hengkai Tan,

Shenghao Xie,

Zeyuan Wang,

Shuhe Huang,

Haitian Liu,

Ruowen Zhao,

Yao Feng,

Chendong Xiang,

Yinze Rong,

Hongyan Zhao,

Hanyu Liu,

Zhizhong Su,

Lei Ma,

Hang Su,

Jun Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Bi_2026_CVPR, author = {Bi, Hongzhe and Tan, Hengkai and Xie, Shenghao and Wang, Zeyuan and Huang, Shuhe and Liu, Haitian and Zhao, Ruowen and Feng, Yao and Xiang, Chendong and Rong, Yinze and Zhao, Hongyan and Liu, Hanyu and Su, Zhizhong and Ma, Lei and Su, Hang and Zhu, Jun}, title = {Motus: A Unified Latent Action World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35101-35113} }
Physics-Consistent Diffusion for Efficient Fluid Super-Resolution via Multiscale Residual Correction: Zhihao Li,

Shengwei Dong,

Chuang Yi,

Junxuan Gao,

Zhilu Lai,

Zhiqiang Liu,

Wei Wang,

Guangtao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhihao and Dong, Shengwei and Yi, Chuang and Gao, Junxuan and Lai, Zhilu and Liu, Zhiqiang and Wang, Wei and Zhang, Guangtao}, title = {Physics-Consistent Diffusion for Efficient Fluid Super-Resolution via Multiscale Residual Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30574-30583} }
GenColorBench: A Color Evaluation Benchmark for Text-to-Image Generation: Muhammad Atif Butt,

Alexandra Gomez-Villa,

Tao Wu,

Javier Vazquez-Corral,

Joost Van De Weijer,

Kai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Butt_2026_CVPR, author = {Butt, Muhammad Atif and Gomez-Villa, Alexandra and Wu, Tao and Vazquez-Corral, Javier and Van De Weijer, Joost and Wang, Kai}, title = {GenColorBench: A Color Evaluation Benchmark for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36638-36648} }
FedRAC: Rolling Submodel Allocation for Collaborative Fairness in Federated Learning: Zihui Wang,

Yuhang Fu,

Mengmeng Du,

Zhimin Yuan,

Yachen Liu,

Weisheng Liao,

Kaiyu Wang,

Zheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihui and Fu, Yuhang and Du, Mengmeng and Yuan, Zhimin and Liu, Yachen and Liao, Weisheng and Wang, Kaiyu and Wang, Zheng}, title = {FedRAC: Rolling Submodel Allocation for Collaborative Fairness in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31802-31811} }
SpeeDiff: Scalable Pixel-Anchored End-to-End Latent Diffusion Model: Bingliang Zhang,

Wenda Chu,

Yizhuo Li,

Linjie Yang,

Yisong Yue,

Katherine Bouman,

Yang Song,

Qiushan Guo; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bingliang and Chu, Wenda and Li, Yizhuo and Yang, Linjie and Yue, Yisong and Bouman, Katherine and Song, Yang and Guo, Qiushan}, title = {SpeeDiff: Scalable Pixel-Anchored End-to-End Latent Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35893-35903} }
PoseMaster: A Unified 3D Native Framework for Stylized Pose Generation: Hongyu Yan,

Kunming Luo,

Weiyu Li,

Kaiyi Zhang,

Yixun Liang,

Jingwei Huang,

Chunchao Guo,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Hongyu and Luo, Kunming and Li, Weiyu and Zhang, Kaiyi and Liang, Yixun and Huang, Jingwei and Guo, Chunchao and Tan, Ping}, title = {PoseMaster: A Unified 3D Native Framework for Stylized Pose Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34292-34302} }
Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning: Songyuan Yang,

Weijiang Yu,

Jilin Ma,

Ziyu Liu,

Guijian Tang,

Wenjing Yang,

Huibin Tan,

Nong Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Yu, Weijiang and Ma, Jilin and Liu, Ziyu and Tang, Guijian and Yang, Wenjing and Tan, Huibin and Xiao, Nong}, title = {Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33794-33804} }
CineBrain: A Large-Scale Multi-Modal Audiovisual Brain Dataset for Brain-Conditioned Video Generation: Jianxiong Gao,

Yichang Liu,

Baofeng Yang,

Jianfeng Feng,

Yanwei Fu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jianxiong and Liu, Yichang and Yang, Baofeng and Feng, Jianfeng and Fu, Yanwei}, title = {CineBrain: A Large-Scale Multi-Modal Audiovisual Brain Dataset for Brain-Conditioned Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36224-36234} }
Latent Implicit Visual Reasoning: Kelvin Li,

Chuyi Shang,

Leonid Karlinsky,

Rogerio Feris,

Trevor Darrell,

Roei Herzig; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kelvin and Shang, Chuyi and Karlinsky, Leonid and Feris, Rogerio and Darrell, Trevor and Herzig, Roei}, title = {Latent Implicit Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33457-33466} }
Fractal Camouflage: A Bio-Inspired Approach for Multi-Scale Adversarial Attacks in the Infrared Domain: Chengyin Hu,

Xin Wang,

Rui Qiu,

Zhe Jia,

Yingying Zhao,

Kai Wang,

Xu Kang,

Yiwei Wei; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Chengyin and Wang, Xin and Qiu, Rui and Jia, Zhe and Zhao, Yingying and Wang, Kai and Kang, Xu and Wei, Yiwei}, title = {Fractal Camouflage: A Bio-Inspired Approach for Multi-Scale Adversarial Attacks in the Infrared Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34981-34990} }
Finding Distributed Object-Centric Properties in Self-Supervised Transformers: Samyak Rawlekar,

Amitabh Swain,

Yujun Cai,

Yiwei Wang,

Ming-Hsuan Yang,

Narendra Ahuja; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rawlekar_2026_CVPR, author = {Rawlekar, Samyak and Swain, Amitabh and Cai, Yujun and Wang, Yiwei and Yang, Ming-Hsuan and Ahuja, Narendra}, title = {Finding Distributed Object-Centric Properties in Self-Supervised Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31326-31335} }
KLIP: Localized Distribution Shift Detection via KL-Divergence with Diffusion Priors in Inverse Problems: Alireza Kheirandish,

Jihoon Hong,

Sara Fridovich-Keil; [pdf] [supp]
[bibtex]
@InProceedings{Kheirandish_2026_CVPR, author = {Kheirandish, Alireza and Hong, Jihoon and Fridovich-Keil, Sara}, title = {KLIP: Localized Distribution Shift Detection via KL-Divergence with Diffusion Priors in Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30823-30832} }
Scaling Up AI-Generated Image Detection with Generator-Aware Prototypes: Ziheng Qin,

Yuheng Ji,

Renshuai Tao,

Yuxuan Tian,

Yuyang Liu,

Yipu Wang,

Xiaolong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Ziheng and Ji, Yuheng and Tao, Renshuai and Tian, Yuxuan and Liu, Yuyang and Wang, Yipu and Zheng, Xiaolong}, title = {Scaling Up AI-Generated Image Detection with Generator-Aware Prototypes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43008-43017} }
ViKey: Enhancing Temporal Understanding in Videos via Visual Prompting: Yeonkyung Lee,

Dayun Ju,

Youngmin Kim,

Seil Kang,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yeonkyung and Ju, Dayun and Kim, Youngmin and Kang, Seil and Hwang, Seong Jae}, title = {ViKey: Enhancing Temporal Understanding in Videos via Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38880-38890} }
Extending Embodied Question Answering from Perception to Decision: Xicheng Gong,

Qiwei Li,

Peiran Xu,

Yadong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Xicheng and Li, Qiwei and Xu, Peiran and Mu, Yadong}, title = {Extending Embodied Question Answering from Perception to Decision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29567-29577} }
Gaze Target Estimation Anywhere with Concepts: Xu Cao,

Houze Yang,

Vipin Gunda,

Zhongyi Zhou,

Tianyu Xu,

Adarsh Kowdle,

Inki Kim,

James M. Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Xu and Yang, Houze and Gunda, Vipin and Zhou, Zhongyi and Xu, Tianyu and Kowdle, Adarsh and Kim, Inki and Rehg, James M.}, title = {Gaze Target Estimation Anywhere with Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31304-31315} }
Tell Model Where to Look: Mitigating Hallucinations in MLLMs by Vision-Guided Attention: Jianfei Zhao,

Feng Zhang,

Xin Sun,

Chong Feng,

Zhixing Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianfei and Zhang, Feng and Sun, Xin and Feng, Chong and Tan, Zhixing}, title = {Tell Model Where to Look: Mitigating Hallucinations in MLLMs by Vision-Guided Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32582-32591} }
Pico-Banana-400K: A Large-Scale Dataset for Text-Guided Image Editing: Yusu Qian,

Eli Bocek-Rivele,

Liangchen Song,

Jialing Tong,

Yinfei Yang,

Jiasen Lu,

Wenze Hu,

Zhe Gan; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Yusu and Bocek-Rivele, Eli and Song, Liangchen and Tong, Jialing and Yang, Yinfei and Lu, Jiasen and Hu, Wenze and Gan, Zhe}, title = {Pico-Banana-400K: A Large-Scale Dataset for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37226-37235} }
D2Cache: Second-Order Delta Caching for Higher Video Diffusion Acceleration: Enhuai Liu,

Yunke Wang,

Changming Sun,

Chang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Enhuai and Wang, Yunke and Sun, Changming and Xu, Chang}, title = {D2Cache: Second-Order Delta Caching for Higher Video Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43589-43599} }
Deformation-based In-Context Learning for Point Cloud Understanding: Chengxing Lin,

Jinhong Deng,

Yinjie Lei,

Wen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Chengxing and Deng, Jinhong and Lei, Yinjie and Li, Wen}, title = {Deformation-based In-Context Learning for Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39001-39010} }
Differentiable Laplacian Matrix Guided Superpixel Segmentation: Jeremy Juybari,

Josh Hamilton,

Shuvra Das,

Chaofan Chen,

Andre Khalil,

Yifeng Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Juybari_2026_CVPR, author = {Juybari, Jeremy and Hamilton, Josh and Das, Shuvra and Chen, Chaofan and Khalil, Andre and Zhu, Yifeng}, title = {Differentiable Laplacian Matrix Guided Superpixel Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36168-36178} }
Content-Aware Dynamic Patchification for Efficient Video Diffusion: Sheng Li,

Connelly Barnes,

Mamshad Nayeem Rizve,

Hongwu Peng,

Zhengang Li,

Ohi Dibua,

Alireza Ganjdanesh,

Xulong Tang,

Yan Kang,

Yifan Gong; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Sheng and Barnes, Connelly and Rizve, Mamshad Nayeem and Peng, Hongwu and Li, Zhengang and Dibua, Ohi and Ganjdanesh, Alireza and Tang, Xulong and Kang, Yan and Gong, Yifan}, title = {Content-Aware Dynamic Patchification for Efficient Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35936-35945} }
Object-WIPER: Training-Free Object and Associated Effect Removal in Videos: Saksham Singh Kushwaha,

Sayan Nag,

Yapeng Tian,

Kuldeep Kulkarni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kushwaha_2026_CVPR, author = {Kushwaha, Saksham Singh and Nag, Sayan and Tian, Yapeng and Kulkarni, Kuldeep}, title = {Object-WIPER: Training-Free Object and Associated Effect Removal in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38071-38080} }
EarlyTom: Early Token Compression Completes Fast Video Understanding: Hesong Wang,

Xin Jin,

Lu Lu,

Chenhaowen Li,

Jian Chen,

Qiang Liu,

Huan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hesong and Jin, Xin and Lu, Lu and Li, Chenhaowen and Chen, Jian and Liu, Qiang and Wang, Huan}, title = {EarlyTom: Early Token Compression Completes Fast Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40559-40568} }
ReaGEN: Adaptive Generation of Structured Chains-of-Thought for Efficient Multimodal Reasoning: Ruiqing Tian,

Mohan Sai Singamsetti,

Di Niu,

Bahador Rashidi; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Ruiqing and Singamsetti, Mohan Sai and Niu, Di and Rashidi, Bahador}, title = {ReaGEN: Adaptive Generation of Structured Chains-of-Thought for Efficient Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33521-33530} }
High Resolution Neural Video Coding with Bi-directional Confidence-Guided Reference Information Modeling: Feng Ye,

Kai Zhang,

Li Zhang,

Chuanmin Jia; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Feng and Zhang, Kai and Zhang, Li and Jia, Chuanmin}, title = {High Resolution Neural Video Coding with Bi-directional Confidence-Guided Reference Information Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33652-33661} }
WorldStereo: Bridging Camera-Guided Video Generation and Scene Reconstruction via 3D Geometric Memories: Yisu Zhang,

Chenjie Cao,

Tengfei Wang,

Xuhui Zuo,

Junta Wu,

Jianke Zhu,

Chunchao Guo; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yisu and Cao, Chenjie and Wang, Tengfei and Zuo, Xuhui and Wu, Junta and Zhu, Jianke and Guo, Chunchao}, title = {WorldStereo: Bridging Camera-Guided Video Generation and Scene Reconstruction via 3D Geometric Memories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40327-40339} }
SALMUBench: A Benchmark for Sensitive Association-Level Multimodal Unlearning: Cai Selvas-Sala,

Lei Kang,

Lluis Gomez; [pdf] [supp]
[bibtex]
@InProceedings{Selvas-Sala_2026_CVPR, author = {Selvas-Sala, Cai and Kang, Lei and Gomez, Lluis}, title = {SALMUBench: A Benchmark for Sensitive Association-Level Multimodal Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39351-39360} }
MusicInfuser: Making Video Diffusion Listen and Dance: Susung Hong,

Ira Kemelmacher-Shlizerman,

Brian Curless,

Steven M. Seitz; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Susung and Kemelmacher-Shlizerman, Ira and Curless, Brian and Seitz, Steven M.}, title = {MusicInfuser: Making Video Diffusion Listen and Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43751-43761} }
Group Diffusion: Enhancing Image Generation by Unlocking Cross-Sample Collaboration: Sicheng Mo,

Thao Nguyen,

Richard Zhang,

Nick Kolkin,

Siddharth Srinivasan Iyer,

Eli Shechtman,

Krishna Kumar Singh,

Yong Jae Lee,

Bolei Zhou,

Yuheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mo_2026_CVPR, author = {Mo, Sicheng and Nguyen, Thao and Zhang, Richard and Kolkin, Nick and Iyer, Siddharth Srinivasan and Shechtman, Eli and Singh, Krishna Kumar and Lee, Yong Jae and Zhou, Bolei and Li, Yuheng}, title = {Group Diffusion: Enhancing Image Generation by Unlocking Cross-Sample Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35702-35712} }
MoRe: Motion-aware Feed-forward 4D Reconstruction Transformer: Juntong Fang,

Zequn Chen,

Weiqi Zhang,

Donglin Di,

Xuancheng Zhang,

Chengmin Yang,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Juntong and Chen, Zequn and Zhang, Weiqi and Di, Donglin and Zhang, Xuancheng and Yang, Chengmin and Liu, Yu-Shen}, title = {MoRe: Motion-aware Feed-forward 4D Reconstruction Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28914-28924} }
Simple-ViLMedSAM: Simple Text Prompts Meet Vision-Language Models for Medical Image Segmentation: Chengcan Qian,

Dong Nie,

Geng Chen,

Daoqiang Zhang,

Xuyun Wen; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Chengcan and Nie, Dong and Chen, Geng and Zhang, Daoqiang and Wen, Xuyun}, title = {Simple-ViLMedSAM: Simple Text Prompts Meet Vision-Language Models for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30042-30052} }
TIPSv2: Advancing Vision-Language Pretraining with Enhanced Patch-Text Alignment: Bingyi Cao,

Koert Chen,

Kevis-Kokitsi Maninis,

Kaifeng Chen,

Arjun Karpur,

Ye Xia,

Sahil Dua,

Tanmaya Dabral,

Guangxing Han,

Bohyung Han,

Joshua Ainslie,

Alex Bewley,

Mithun Jacob,

René Wagner,

Washington Ramos,

Krzysztof Choromanski,

Mojtaba Seyedhosseini,

Howard Zhou,

Andre Araujo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Bingyi and Chen, Koert and Maninis, Kevis-Kokitsi and Chen, Kaifeng and Karpur, Arjun and Xia, Ye and Dua, Sahil and Dabral, Tanmaya and Han, Guangxing and Han, Bohyung and Ainslie, Joshua and Bewley, Alex and Jacob, Mithun and Wagner, Ren\'e and Ramos, Washington and Choromanski, Krzysztof and Seyedhosseini, Mojtaba and Zhou, Howard and Araujo, Andre}, title = {TIPSv2: Advancing Vision-Language Pretraining with Enhanced Patch-Text Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29325-29335} }
The LLM Bottleneck: Why Open-Source Vision LLMs Struggle with Hierarchical Visual Recognition: Yuwen Tan,

Yuan Qing,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Yuwen and Qing, Yuan and Gong, Boqing}, title = {The LLM Bottleneck: Why Open-Source Vision LLMs Struggle with Hierarchical Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38532-38543} }
Test-Time Alignment of Text-to-Image Diffusion Models via Null-Text Embedding Optimisation: Taehoon Kim,

Henry Gouk,

Timothy Hospedales; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Taehoon and Gouk, Henry and Hospedales, Timothy}, title = {Test-Time Alignment of Text-to-Image Diffusion Models via Null-Text Embedding Optimisation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43611-43620} }
SPEAR-1: Scaling Beyond Robot Demonstrations via 3D Understanding: Nikolay Nikolov,

Giuliano Albanese,

Sombit Dey,

Aleksandar Yanev,

Luc Van Gool,

Jan-Nico Zaech,

Danda Pani Paudel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nikolov_2026_CVPR, author = {Nikolov, Nikolay and Albanese, Giuliano and Dey, Sombit and Yanev, Aleksandar and Van Gool, Luc and Zaech, Jan-Nico and Paudel, Danda Pani}, title = {SPEAR-1: Scaling Beyond Robot Demonstrations via 3D Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35124-35134} }
When Token Pruning is Worse than Random: Understanding Visual Token Information in VLLMs: Yahong Wang,

Juncheng Wu,

Zhangkai Ni,

Longzhen Yang,

Yihang Liu,

Chengmei Yang,

Ying Wen,

Lianghua He,

Xianfeng Tang,

Hui Liu,

Yuyin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yahong and Wu, Juncheng and Ni, Zhangkai and Yang, Longzhen and Liu, Yihang and Yang, Chengmei and Wen, Ying and He, Lianghua and Tang, Xianfeng and Liu, Hui and Zhou, Yuyin}, title = {When Token Pruning is Worse than Random: Understanding Visual Token Information in VLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31910-31919} }
Rethinking Diffusion Model-Based Video Super-Resolution: Leveraging Dense Guidance from Aligned Features: Jingyi Xu,

Meisong Zheng,

Ying Chen,

Minglang Qiao,

Xin Deng,

Mai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jingyi and Zheng, Meisong and Chen, Ying and Qiao, Minglang and Deng, Xin and Xu, Mai}, title = {Rethinking Diffusion Model-Based Video Super-Resolution: Leveraging Dense Guidance from Aligned Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38248-38257} }
TANGO: Text-Anchored Guided Optimization for Robust Fine-tuning Vision-Language Models under Label Noise: Tengfei Ma,

Weiran Pan,

Wei Wei; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tengfei and Pan, Weiran and Wei, Wei}, title = {TANGO: Text-Anchored Guided Optimization for Robust Fine-tuning Vision-Language Models under Label Noise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39186-39196} }
Too Vivid to Be Real? Benchmarking and Calibrating Generative Color Fidelity: Zhengyao Fang,

Zexi Jia,

Yijia Zhong,

Pengcheng Luo,

Jinchao Zhang,

Guangming Lu,

Jun Yu,

Wenjie Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Zhengyao and Jia, Zexi and Zhong, Yijia and Luo, Pengcheng and Zhang, Jinchao and Lu, Guangming and Yu, Jun and Pei, Wenjie}, title = {Too Vivid to Be Real? Benchmarking and Calibrating Generative Color Fidelity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37258-37267} }
EgoRoC: Towards Egocentric Robotic Control via Task-Agnostic Visual Alignment: Wei Feng,

Chi Zhang,

Nan Li,

Qian Zhang,

Qi Zhang,

Mingyan Li; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Zhang, Chi and Li, Nan and Zhang, Qian and Zhang, Qi and Li, Mingyan}, title = {EgoRoC: Towards Egocentric Robotic Control via Task-Agnostic Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34991-35001} }
Physics-Guided Multistep Deformation Reversal for Ancient Bamboo Slip Restoration: Qianqian Tang,

Jinchi Zhu,

Xiaolu Zhou,

Yongchao Xu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Qianqian and Zhu, Jinchi and Zhou, Xiaolu and Xu, Yongchao}, title = {Physics-Guided Multistep Deformation Reversal for Ancient Bamboo Slip Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34061-34071} }
In Pursuit of Pixel Supervision for Visual Pre-training: Lihe Yang,

Shang-Wen Li,

Yang Li,

Xinjie Lei,

Dong Wang,

Abdelrahman Mohamed,

Saining Xie,

Hengshuang Zhao,

Kaiming He,

Hu Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Lihe and Li, Shang-Wen and Li, Yang and Lei, Xinjie and Wang, Dong and Mohamed, Abdelrahman and Xie, Saining and Zhao, Hengshuang and He, Kaiming and Xu, Hu}, title = {In Pursuit of Pixel Supervision for Visual Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31974-31984} }
Dense Metric Depth Completion from Sparse Direct Time-of-Flight Sensors: Hakyeong Kim,

Ruicheng Wang,

Chengtang Yao,

Jiaolong Yang,

Min H. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hakyeong and Wang, Ruicheng and Yao, Chengtang and Yang, Jiaolong and Kim, Min H.}, title = {Dense Metric Depth Completion from Sparse Direct Time-of-Flight Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36518-36528} }
Omni-Supervised Motion Editing: Balancing Change and Invariance through Positive-Negative Learning: Zhenwu Shi,

Jingyu Gong,

Peiwei Wang,

Xingzan Wang,

Tianwen Qian,

Wenxi Li,

Yuan Fang,

Jiao Xie,

Lizhuang Ma,

Shaohui Lin; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhenwu and Gong, Jingyu and Wang, Peiwei and Wang, Xingzan and Qian, Tianwen and Li, Wenxi and Fang, Yuan and Xie, Jiao and Ma, Lizhuang and Lin, Shaohui}, title = {Omni-Supervised Motion Editing: Balancing Change and Invariance through Positive-Negative Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30595-30606} }
FabricGen: Microstructure-Aware Woven Fabric Generation: Yingjie Tang,

Di Luo,

Zixiong Wang,

Xiaoli Ling,

Jian Yang,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yingjie and Luo, Di and Wang, Zixiong and Ling, Xiaoli and Yang, Jian and Wang, Beibei}, title = {FabricGen: Microstructure-Aware Woven Fabric Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34333-34342} }
FedRE: A Representation Entanglement Framework for Model-Heterogeneous Federated Learning: Yuan Yao,

Lixu Wang,

Jiaqi Wu,

Jin Song,

Simin Chen,

Zehua Wang,

Zijian Tian,

Wei Chen,

Huixia Li,

Xiaoxiao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Yuan and Wang, Lixu and Wu, Jiaqi and Song, Jin and Chen, Simin and Wang, Zehua and Tian, Zijian and Chen, Wei and Li, Huixia and Li, Xiaoxiao}, title = {FedRE: A Representation Entanglement Framework for Model-Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39466-39475} }
Scale Space Diffusion: Soumik Mukhopadhyay,

Prateksha Udhayanan,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mukhopadhyay_2026_CVPR, author = {Mukhopadhyay, Soumik and Udhayanan, Prateksha and Shrivastava, Abhinav}, title = {Scale Space Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35851-35860} }
VisRes Bench: On Evaluating the Visual Reasoning Capabilities of VLMs: Brigitta Malagurski Törtei,

Yasser Dahou,

Ngoc Dung Huynh,

Wamiq Reyaz Para,

Phúc H. Lê Khac,

Ankit Singh,

Sofian Chaybouti,

Sanath Narayan; [pdf] [supp]
[bibtex]
@InProceedings{Tortei_2026_CVPR, author = {T\"ortei, Brigitta Malagurski and Dahou, Yasser and Huynh, Ngoc Dung and Para, Wamiq Reyaz and Khac, Ph\'uc H. L\^e and Singh, Ankit and Chaybouti, Sofian and Narayan, Sanath}, title = {VisRes Bench: On Evaluating the Visual Reasoning Capabilities of VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33185-33195} }
Learned Image Compression via Sparse Attention and Adaptive Frequency: Huidong Ma,

Xinyan Shi,

Hui Sun,

Xiaofei Yue,

Xiaoguang Liu,

Gang Wang,

Wentong Cai; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Huidong and Shi, Xinyan and Sun, Hui and Yue, Xiaofei and Liu, Xiaoguang and Wang, Gang and Cai, Wentong}, title = {Learned Image Compression via Sparse Attention and Adaptive Frequency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41278-41287} }
FedAdamom: Adaptive Momentum for Improved Generalization in Federated Optimization: Wenjie Hou,

Tianxiang Chen,

Feng Wang,

Tiantong Wu,

Zhiming Zheng,

Shaoting Tang,

Wei Yang Bryan Lim; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Wenjie and Chen, Tianxiang and Wang, Feng and Wu, Tiantong and Zheng, Zhiming and Tang, Shaoting and Lim, Wei Yang Bryan}, title = {FedAdamom: Adaptive Momentum for Improved Generalization in Federated Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36354-36364} }
SEATrack: Simple, Efficient, and Adaptive Multimodal Tracker: Junbin Su,

Ziteng Xue,

Shihui Zhang,

Kun Chen,

Weiming Hu,

Zhipeng Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Junbin and Xue, Ziteng and Zhang, Shihui and Chen, Kun and Hu, Weiming and Zhang, Zhipeng}, title = {SEATrack: Simple, Efficient, and Adaptive Multimodal Tracker}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28679-28689} }
Dual-level Adapter Boosting Prompt-free Curvilinear Structure Segmentation: Kai Zhu,

Li Chen,

Jun Cheng; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kai and Chen, Li and Cheng, Jun}, title = {Dual-level Adapter Boosting Prompt-free Curvilinear Structure Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36300-36310} }
Anchoring the Mind of Multimodal Reasoners: Cognitive Bias as a Vector for Jailbreak Attacks: Linhua Cong,

Bingrui Sima,

Kun He; [pdf] [supp]
[bibtex]
@InProceedings{Cong_2026_CVPR, author = {Cong, Linhua and Sima, Bingrui and He, Kun}, title = {Anchoring the Mind of Multimodal Reasoners: Cognitive Bias as a Vector for Jailbreak Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36926-36935} }
BEV-CAR: Enhancing Monocular Bird's Eye View Segmentation with Context-Aware Rasterization: Yixin Xiong,

Ke Wang,

Tongtong Cheng,

Chunhui Liu,

Kai Liu; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Yixin and Wang, Ke and Cheng, Tongtong and Liu, Chunhui and Liu, Kai}, title = {BEV-CAR: Enhancing Monocular Bird's Eye View Segmentation with Context-Aware Rasterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39869-39878} }
BiomedCCPL: Causal Conditional Prompt Learning for Biomedical Vision-Language Models: Xueliang Cui,

Juncai Zhang,

Jiacheng Hou,

Dan Lu,

Hao Zhang,

Ruxin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Xueliang and Zhang, Juncai and Hou, Jiacheng and Lu, Dan and Zhang, Hao and Wang, Ruxin}, title = {BiomedCCPL: Causal Conditional Prompt Learning for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40812-40821} }
RaUF: Learning the Spatial Uncertainty Field of Radar: Shengpeng Wang,

Kuangyu Wang,

Wei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shengpeng and Wang, Kuangyu and Wang, Wei}, title = {RaUF: Learning the Spatial Uncertainty Field of Radar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42474-42483} }
Cross-modal Fuzzy Alignment Network for Text-Aerial Person Retrieval and A Large-scale Benchmark: Yifei Deng,

Chenglong Li,

Yuyang Zhang,

Guyue Hu,

Jin Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Yifei and Li, Chenglong and Zhang, Yuyang and Hu, Guyue and Tang, Jin}, title = {Cross-modal Fuzzy Alignment Network for Text-Aerial Person Retrieval and A Large-scale Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38732-38741} }
SkillSight: Efficient First-Person Skill Assessment with Gaze: Chi Hsuan Wu,

Ashutosh Kumar,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chi Hsuan and Kumar, Ashutosh and Grauman, Kristen}, title = {SkillSight: Efficient First-Person Skill Assessment with Gaze}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38891-38903} }
Molmo2: Open Weights and Data for Vision-Language Models with Video Understanding and Grounding: Christopher Clark,

Jieyu Zhang,

Zixian Ma,

Jae Sung Park,

Rohun Tripathi,

Sangho Lee,

Mohammadreza Salehi,

Jason Ren,

Chris Dongjoo Kim,

Yinuo Yang,

Vincent Shao,

Yue Yang,

Weikai Huang,

Ziqi Gao,

Taira Anderson,

Jianrui Zhang,

Jitesh Jain,

George Stoica,

Ali Farhadi,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Clark_2026_CVPR, author = {Clark, Christopher and Zhang, Jieyu and Ma, Zixian and Park, Jae Sung and Tripathi, Rohun and Lee, Sangho and Salehi, Mohammadreza and Ren, Jason and Kim, Chris Dongjoo and Yang, Yinuo and Shao, Vincent and Yang, Yue and Huang, Weikai and Gao, Ziqi and Anderson, Taira and Zhang, Jianrui and Jain, Jitesh and Stoica, George and Farhadi, Ali and Krishna, Ranjay}, title = {Molmo2: Open Weights and Data for Vision-Language Models with Video Understanding and Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28652-28668} }
MonoVLM: Monocular 3D Visual Grounding with Vision Language Models: Huaizhi Qu,

Hossein Nourkhiz Mahjoub,

Vaishnav Tadiparthi,

Kwonjoon Lee,

Tianlong Chen; [pdf]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Huaizhi and Mahjoub, Hossein Nourkhiz and Tadiparthi, Vaishnav and Lee, Kwonjoon and Chen, Tianlong}, title = {MonoVLM: Monocular 3D Visual Grounding with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30986-30996} }
Single-Round Scalable Analytic Federated Learning: Alan T. L. Bacellar,

Mustafa Munir,

Felipe M. G. França,

Priscila M. V. Lima,

Radu Marculescu,

Lizy K. John; [pdf]
[bibtex]
@InProceedings{Bacellar_2026_CVPR, author = {Bacellar, Alan T. L. and Munir, Mustafa and Fran\c{c}a, Felipe M. G. and Lima, Priscila M. V. and Marculescu, Radu and John, Lizy K.}, title = {Single-Round Scalable Analytic Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39445-39454} }
HierEdit: Region-Aware Hierarchical Diffusion for Efficient High-Resolution Editing: Yuyao Zhang,

Alexander Huang-Menders,

Yu-Wing Tai; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuyao and Huang-Menders, Alexander and Tai, Yu-Wing}, title = {HierEdit: Region-Aware Hierarchical Diffusion for Efficient High-Resolution Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43546-43557} }
BluRef: Unsupervised Image Deblurring with Dense-Matching References: Bang-Dang Pham,

Anh Tran,

Cuong Pham,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2026_CVPR, author = {Pham, Bang-Dang and Tran, Anh and Pham, Cuong and Hoai, Minh}, title = {BluRef: Unsupervised Image Deblurring with Dense-Matching References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37445-37454} }
Scaling Instruction-Based Video Editing with a High-Quality Synthetic Dataset: Qingyan Bai,

Qiuyu Wang,

Hao Ouyang,

Yue Yu,

Hanlin Wang,

Wen Wang,

Ka Leong Cheng,

Shuailei Ma,

Yanhong Zeng,

Zichen Liu,

Yinghao Xu,

Yujun Shen,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Qingyan and Wang, Qiuyu and Ouyang, Hao and Yu, Yue and Wang, Hanlin and Wang, Wen and Cheng, Ka Leong and Ma, Shuailei and Zeng, Yanhong and Liu, Zichen and Xu, Yinghao and Shen, Yujun and Chen, Qifeng}, title = {Scaling Instruction-Based Video Editing with a High-Quality Synthetic Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37971-37981} }
Cluster-Wise Spatio-Temporal Masking for Efficient Video-Language Pretraining: Weijun Zhuang,

Yuqing Huang,

Weikang Meng,

Xin Li,

Ming Liu,

Xiaopeng Hong,

Yaowei Wang,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Weijun and Huang, Yuqing and Meng, Weikang and Li, Xin and Liu, Ming and Hong, Xiaopeng and Wang, Yaowei and Zuo, Wangmeng}, title = {Cluster-Wise Spatio-Temporal Masking for Efficient Video-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39197-39207} }
FG-Portrait: 3D Flow Guided Editable Portrait Animation: Yating Xu,

Yunqi Miao,

Evangelos Ververas,

Jiankang Deng,

Jifei Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yating and Miao, Yunqi and Ververas, Evangelos and Deng, Jiankang and Song, Jifei}, title = {FG-Portrait: 3D Flow Guided Editable Portrait Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32947-32956} }
Follow the Saliency: Supervised Saliency for Retrieval-augmented Dense Video Captioning: Seung hee Choi,

MinJu Jeon,

Hyunwoo Oh,

Jihwan Lee,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Seung hee and Jeon, MinJu and Oh, Hyunwoo and Lee, Jihwan and Kim, Dong-Jin}, title = {Follow the Saliency: Supervised Saliency for Retrieval-augmented Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32808-32817} }
Cross-domain Dual-stream Feature Disentanglement for Brain Disorder Prediction with Sparsely Labeled PET: Huabin Wang,

Xinyu Chen,

Yuan Zhou,

Fei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Huabin and Chen, Xinyu and Zhou, Yuan and Liu, Fei}, title = {Cross-domain Dual-stream Feature Disentanglement for Brain Disorder Prediction with Sparsely Labeled PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32037-32046} }
Focal-General Diffusion Model with Semantic Consistent Guidance for Sign Language Production: Yiheng Yu,

Sheng Liu,

Yuan Feng,

Zhelun Jin,

Yining Jiang,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yiheng and Liu, Sheng and Feng, Yuan and Jin, Zhelun and Jiang, Yining and Xu, Min}, title = {Focal-General Diffusion Model with Semantic Consistent Guidance for Sign Language Production}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35915-35925} }
MatchMask: Mask-Centric Generative Data Augmentation for Label-Scarce Semantic Segmentation: Yuqi Lin,

Hao Zhang,

Wenqi Shao,

Shiqu Liu,

Zhihong Gu,

Wenxiao Wang,

Xiaofei He,

Kaipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yuqi and Zhang, Hao and Shao, Wenqi and Liu, Shiqu and Gu, Zhihong and Wang, Wenxiao and He, Xiaofei and Zhang, Kaipeng}, title = {MatchMask: Mask-Centric Generative Data Augmentation for Label-Scarce Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42126-42136} }
Decoupling Vision and Language: Codebook Anchored Visual Adaptation: Jason Wu,

Tianchen Zhao,

Chang Liu,

Jiarui Cai,

Zheng Zhang,

Zhuowei Li,

Aaditya Singh,

Xiang Xu,

Mani Srivastava,

Jonathan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jason and Zhao, Tianchen and Liu, Chang and Cai, Jiarui and Zhang, Zheng and Li, Zhuowei and Singh, Aaditya and Xu, Xiang and Srivastava, Mani and Wu, Jonathan}, title = {Decoupling Vision and Language: Codebook Anchored Visual Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36957-36967} }
Forging a Dynamic Memory: Retrieval-Guided Continual Learning for Generalist Medical Foundation Models: Zizhi Chen,

Yizhen Gao,

Minghao Han,

Yizhou Liu,

Zhaoyu Chen,

Dingkang Yang,

Lihua Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zizhi and Gao, Yizhen and Han, Minghao and Liu, Yizhou and Chen, Zhaoyu and Yang, Dingkang and Zhang, Lihua}, title = {Forging a Dynamic Memory: Retrieval-Guided Continual Learning for Generalist Medical Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32309-32321} }
Revisiting Visual Corruptions in LVLMs: A Shape-Texture Perspective on Model Failures: Xinkuan Qiu,

Meina Kan,

Zhenliang He,

Yongbin Zhou,

Shiguang Shan; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xinkuan and Kan, Meina and He, Zhenliang and Zhou, Yongbin and Shan, Shiguang}, title = {Revisiting Visual Corruptions in LVLMs: A Shape-Texture Perspective on Model Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40845-40854} }
Seeing Conversations: Communication Context Identification in Egocentric Video: Tobias Dorszewski,

Jens Hjortkjær; [pdf]
[bibtex]
@InProceedings{Dorszewski_2026_CVPR, author = {Dorszewski, Tobias and Hjortkj{\ae}r, Jens}, title = {Seeing Conversations: Communication Context Identification in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38816-38825} }
SparseCam4D: Spatio-Temporally Consistent 4D Reconstruction from Sparse Cameras: Weihong Pan,

Xiaoyu Zhang,

Zhuang Zhang,

Zhichao Ye,

Nan Wang,

Haomin Liu,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Weihong and Zhang, Xiaoyu and Zhang, Zhuang and Ye, Zhichao and Wang, Nan and Liu, Haomin and Zhang, Guofeng}, title = {SparseCam4D: Spatio-Temporally Consistent 4D Reconstruction from Sparse Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33237-33247} }
Learning to Diversify and Focus: A Reinforcement Framework for Open-Vocabulary HOI Detection: Yongchao Xu,

Jiawei Liu,

Junfeng Wang,

Sen Tao,

Na Jiang,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yongchao and Liu, Jiawei and Wang, Junfeng and Tao, Sen and Jiang, Na and Zha, Zheng-Jun}, title = {Learning to Diversify and Focus: A Reinforcement Framework for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34673-34682} }
FireScope: Wildfire Risk Raster Prediction With a Chain-of-Thought Oracle: Mario Markov,

Stefan Ailuro,

Luc Van Gool,

Konrad Schindler,

Danda Pani Paudel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Markov_2026_CVPR, author = {Markov, Mario and Ailuro, Stefan and Van Gool, Luc and Schindler, Konrad and Paudel, Danda Pani}, title = {FireScope: Wildfire Risk Raster Prediction With a Chain-of-Thought Oracle}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34795-34805} }
Deciphering Genotype-Phenotype Mechanisms from High-Content Profiling via Knowledge-Guided Multi-modal Graph Learning: Hanjing Lin,

Jiahua Rao,

Youhan Sun,

Jiancong Xie,

Yuedong Yang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Hanjing and Rao, Jiahua and Sun, Youhan and Xie, Jiancong and Yang, Yuedong}, title = {Deciphering Genotype-Phenotype Mechanisms from High-Content Profiling via Knowledge-Guided Multi-modal Graph Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41804-41814} }
FedBPrompt: Federated Domain Generalization Person Re-Identification via Body Distribution Aware Visual Prompts: Xin Xu,

Weilong Li,

Wei Liu,

Wenke Huang,

Zhixi Yu,

Bin Yang,

Xiaoying Liao,

Kui Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Xin and Li, Weilong and Liu, Wei and Huang, Wenke and Yu, Zhixi and Yang, Bin and Liao, Xiaoying and Jiang, Kui}, title = {FedBPrompt: Federated Domain Generalization Person Re-Identification via Body Distribution Aware Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40448-40457} }
ARMFlow: AutoRegressive MeanFlow for Online 3D Human Reaction Generation: Zichen Geng,

Zeeshan Hayder,

Wei Liu,

Hesheng Wang,

Ajmal Saeed Mian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Zichen and Hayder, Zeeshan and Liu, Wei and Wang, Hesheng and Mian, Ajmal Saeed}, title = {ARMFlow: AutoRegressive MeanFlow for Online 3D Human Reaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30718-30728} }
FUSAR-GPT: A Spatiotemporal Feature-Embedded and Two-Stage Decoupled Visual Language Model for SAR Imagery: Xiaokun Zhang,

Yi Yang,

Ziqi Ye,

Baiyun Baiyun,

Xiaorong Guo,

Qingchen Fang,

Ruyi Zhang,

Xinpeng Zhou,

Haipeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaokun and Yang, Yi and Ye, Ziqi and Baiyun, Baiyun and Guo, Xiaorong and Fang, Qingchen and Zhang, Ruyi and Zhou, Xinpeng and Wang, Haipeng}, title = {FUSAR-GPT: A Spatiotemporal Feature-Embedded and Two-Stage Decoupled Visual Language Model for SAR Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42158-42168} }
IrisFP: Adversarial-Example-based Model Fingerprinting with Enhanced Uniqueness and Robustness: Ziye Geng,

Guang Yang,

Yihang Chen,

Changqing Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Ziye and Yang, Guang and Chen, Yihang and Luo, Changqing}, title = {IrisFP: Adversarial-Example-based Model Fingerprinting with Enhanced Uniqueness and Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39383-39392} }
CrossHOI-Bench: A Unified Benchmark for HOI Evaluation across Vision-Language Models and HOI-Specific Methods: Qinqian Lei,

Bo Wang,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Qinqian and Wang, Bo and Tan, Robby T.}, title = {CrossHOI-Bench: A Unified Benchmark for HOI Evaluation across Vision-Language Models and HOI-Specific Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38520-38531} }
Unleashing Stealthy Backdoor Pandemic by Infecting a Single Diffusion Model: Mohaiminul Al Nahian,

Abeer Matar Almalky,

Sabbir Ahmed,

Abdullah Al Arafat,

Mamshad Nayeem Rizve,

Adnan Siraj Rakin; [pdf] [supp]
[bibtex]
@InProceedings{Al_Nahian_2026_CVPR, author = {Al Nahian, Mohaiminul and Almalky, Abeer Matar and Ahmed, Sabbir and Al Arafat, Abdullah and Rizve, Mamshad Nayeem and Rakin, Adnan Siraj}, title = {Unleashing Stealthy Backdoor Pandemic by Infecting a Single Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34889-34899} }
Joint-Aligned Latent Action: Towards Scalable VLA Pretraining in the Wild: Hao Luo,

Ye Wang,

Wanpeng Zhang,

Haoqi Yuan,

Yicheng Feng,

Haiweng Xu,

Sipeng Zheng,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Hao and Wang, Ye and Zhang, Wanpeng and Yuan, Haoqi and Feng, Yicheng and Xu, Haiweng and Zheng, Sipeng and Lu, Zongqing}, title = {Joint-Aligned Latent Action: Towards Scalable VLA Pretraining in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35047-35058} }
SIGMA: Selective-Interleaved Generation with Multi-Attribute Tokens: Xiaoyan Zhang,

Zechen Bai,

Haofan Wang,

Yiren Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoyan and Bai, Zechen and Wang, Haofan and Song, Yiren}, title = {SIGMA: Selective-Interleaved Generation with Multi-Attribute Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38165-38175} }
TaskForce: Cooperative Multi-agent Reinforcement Learning for Multi-task Optimization: Wonhyeok Choi,

Kyumin Hwang,

Jihun Park,

Kyoungmin Lee,

Seunghun Lee,

Jaeyeul Kim,

Minwoo Choi,

Sunghoon Im; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Wonhyeok and Hwang, Kyumin and Park, Jihun and Lee, Kyoungmin and Lee, Seunghun and Kim, Jaeyeul and Choi, Minwoo and Im, Sunghoon}, title = {TaskForce: Cooperative Multi-agent Reinforcement Learning for Multi-task Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36871-36880} }
From Attraction to Equilibrium: Physics-Inspired Semantic Gravitons for Zero-Shot Anomaly Detection: Yuwen Pan,

Yuan Wang,

Shaohui Li,

Zhi Li,

Yu Liu,

You He; [pdf]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Yuwen and Wang, Yuan and Li, Shaohui and Li, Zhi and Liu, Yu and He, You}, title = {From Attraction to Equilibrium: Physics-Inspired Semantic Gravitons for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35628-35637} }
SegGBC: Justifiable Coarse-to-Fine Granular-Ball Computing for Enhancing Clustering Image Segmentation: Qianpeng Chong,

Wenyi Zeng,

Xiuxuan Shen,

Jiajie Li,

Qian Yin,

Xin Zheng; [pdf]
[bibtex]
@InProceedings{Chong_2026_CVPR, author = {Chong, Qianpeng and Zeng, Wenyi and Shen, Xiuxuan and Li, Jiajie and Yin, Qian and Zheng, Xin}, title = {SegGBC: Justifiable Coarse-to-Fine Granular-Ball Computing for Enhancing Clustering Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42104-42114} }
GaussianMatch: Semi-Supervised Regression with Pseudo-Label Filtering via Multi-View Gaussian Consistency: Yin Wang,

Hao Lu,

Zixuan Wang,

Zhen Qin,

Li Kuang,

Mengchu Zhou,

Shuiguang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yin and Lu, Hao and Wang, Zixuan and Qin, Zhen and Kuang, Li and Zhou, Mengchu and Deng, Shuiguang}, title = {GaussianMatch: Semi-Supervised Regression with Pseudo-Label Filtering via Multi-View Gaussian Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31985-31994} }
Fresco: Frequency-Spatial Consistent Optimization for Fine-Grained Head Avatar Modeling: Shikun Zhang,

Yong Li,

Yiqun Wang,

Qiuhong Ke,

Cunjian Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shikun and Li, Yong and Wang, Yiqun and Ke, Qiuhong and Chen, Cunjian}, title = {Fresco: Frequency-Spatial Consistent Optimization for Fine-Grained Head Avatar Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40130-40139} }
Long-RVOS: A Comprehensive Benchmark for Long-term Referring Video Object Segmentation: Tianming Liang,

Haichao Jiang,

Yuting Yang,

Chaolei Tan,

Shuai Li,

Wei-Shi Zheng,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Tianming and Jiang, Haichao and Yang, Yuting and Tan, Chaolei and Li, Shuai and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Long-RVOS: A Comprehensive Benchmark for Long-term Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39497-39507} }
Curvature-Aware Captioning: Leveraging Geodesic Attention for 3D Scene Understanding: Ziyao He,

Yingjie Liu,

Zhang Yangrui,

Mingsong Chen,

Xuan Tang,

Xian Wei; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Ziyao and Liu, Yingjie and Yangrui, Zhang and Chen, Mingsong and Tang, Xuan and Wei, Xian}, title = {Curvature-Aware Captioning: Leveraging Geodesic Attention for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30997-31007} }
Personalized Image Descriptions from Attention Sequences: Ruoyu Xue,

Hieu Le,

Jingyi Xu,

Sounak Mondal,

Abe Leite,

Gregory Zelinsky,

Minh Hoai,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Ruoyu and Le, Hieu and Xu, Jingyi and Mondal, Sounak and Leite, Abe and Zelinsky, Gregory and Hoai, Minh and Samaras, Dimitris}, title = {Personalized Image Descriptions from Attention Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40042-40052} }
MER-Tracker: Towards High-Speed 3D Point Tracking via Multi-View Event-RGB Hybrid Cameras: Yiqian Chang,

Qinghong Ye,

Haoran Xu,

Jianing Li,

Dongyang Ma,

Xuan Wang,

Wei Zhang,

Yonghong Tian,

Peixi Peng; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Yiqian and Ye, Qinghong and Xu, Haoran and Li, Jianing and Ma, Dongyang and Wang, Xuan and Zhang, Wei and Tian, Yonghong and Peng, Peixi}, title = {MER-Tracker: Towards High-Speed 3D Point Tracking via Multi-View Event-RGB Hybrid Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37032-37042} }
Experience Transfer for Multimodal LLM Agents in Minecraft Game: Chenghao Li,

Jun Liu,

Songbo Zhang,

Huadong Jian,

Hao Ni,

Lik-Hang Lee,

Sung-Ho Bae,

Guoqing Wang,

Yang Yang,

Chaoning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenghao and Liu, Jun and Zhang, Songbo and Jian, Huadong and Ni, Hao and Lee, Lik-Hang and Bae, Sung-Ho and Wang, Guoqing and Yang, Yang and Zhang, Chaoning}, title = {Experience Transfer for Multimodal LLM Agents in Minecraft Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37143-37153} }
HeSS: Head Sensitivity Score for Sparsity Redistribution in VGGT: Yongsung Kim,

Wooseok Song,

Jaihyun Lew,

Hun Hwangbo,

Jaehoon Lee,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Yongsung and Song, Wooseok and Lew, Jaihyun and Hwangbo, Hun and Lee, Jaehoon and Yoon, Sungroh}, title = {HeSS: Head Sensitivity Score for Sparsity Redistribution in VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36509-36517} }
Towards Dynamic Modality Alignment in Multimodal Continual Learning: Jiayao Tan,

Fan Lyu,

Tianle Liu,

Fuyuan Hu,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Jiayao and Lyu, Fan and Liu, Tianle and Hu, Fuyuan and Feng, Wei}, title = {Towards Dynamic Modality Alignment in Multimodal Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39911-39921} }
SonoWorld: From One Image to a 3D Audio-Visual Scene: Derong Jin,

Xiyi Chen,

Ming C. Lin,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Derong and Chen, Xiyi and Lin, Ming C. and Gao, Ruohan}, title = {SonoWorld: From One Image to a 3D Audio-Visual Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30194-30204} }
AREA3D: Active Reconstruction Agent with Unified Feed-Forward 3D Perception and Vision-Language Guidance: Tianling Xu,

Shengzhe Gan,

Leslie Gu,

Yuelei Li,

Fangneng Zhan,

Hanspeter Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Tianling and Gan, Shengzhe and Gu, Leslie and Li, Yuelei and Zhan, Fangneng and Pfister, Hanspeter}, title = {AREA3D: Active Reconstruction Agent with Unified Feed-Forward 3D Perception and Vision-Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37133-37142} }
Detect Any AI-Counterfeited Text Image: Chenfan Qu,

Yiwu Zhong,

Xuekang Zhu,

Junchi Li,

Changjiang Jiang,

Jian liu,

Lianwen Jin; [pdf] [supp]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Chenfan and Zhong, Yiwu and Zhu, Xuekang and Li, Junchi and Jiang, Changjiang and liu, Jian and Jin, Lianwen}, title = {Detect Any AI-Counterfeited Text Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35437-35450} }
ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction: Aoyu Liu,

Zhen Liu,

Ziyi Wang,

Dian Chen,

Bing Zeng,

Shuaicheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Aoyu and Liu, Zhen and Wang, Ziyi and Chen, Dian and Zeng, Bing and Liu, Shuaicheng}, title = {ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29909-29918} }
DNF-SR: Dual-Input and Negative-Aware Feature Fine-Tuning for Real-World Image Super-Resolution: Shuhao Han,

Wenjie Liao,

Hayden Vance,

Hang Dong,

Rui Zhang,

Chun-Le Guo,

Chongyi Li; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Shuhao and Liao, Wenjie and Vance, Hayden and Dong, Hang and Zhang, Rui and Guo, Chun-Le and Li, Chongyi}, title = {DNF-SR: Dual-Input and Negative-Aware Feature Fine-Tuning for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38176-38186} }
Taming Generative Diffusion Model for Task-Oriented Infrared Imaging: Tengyu Ma,

Zhilong Dai,

Yubo Diao,

Guanming An,

Long Ma,

Jinyuan Liu,

Risheng Liu; [pdf]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tengyu and Dai, Zhilong and Diao, Yubo and An, Guanming and Ma, Long and Liu, Jinyuan and Liu, Risheng}, title = {Taming Generative Diffusion Model for Task-Oriented Infrared Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30843-30853} }
Beyond Fixed Formulas: Data-Driven Linear Predictor for Efficient Diffusion Models: Zhirong Shen,

Rui Huang,

Jiacheng Liu,

Chang Zou,

Peiliang Cai,

Shikang Zheng,

Zhengyi Shi,

Liang Feng,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Zhirong and Huang, Rui and Liu, Jiacheng and Zou, Chang and Cai, Peiliang and Zheng, Shikang and Shi, Zhengyi and Feng, Liang and Zhang, Linfeng}, title = {Beyond Fixed Formulas: Data-Driven Linear Predictor for Efficient Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30792-30801} }
Diversity over Uniformity: Rethinking Representation in Generated Image Detection: Qinghui He,

Haifeng Zhang,

Qiao Qin,

Bo Liu,

Xiuli Bi,

Bin Xiao; [pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Qin, Qiao and Liu, Bo and Bi, Xiuli and Xiao, Bin}, title = {Diversity over Uniformity: Rethinking Representation in Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40407-40417} }
MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks: Lirong Che,

Shuo Wen,

Shan Huang,

Chuang Wang,

Yuzhe Yang,

Gregory Dudek,

Xueqian Wang,

Jian Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Lirong and Wen, Shuo and Huang, Shan and Wang, Chuang and Yang, Yuzhe and Dudek, Gregory and Wang, Xueqian and Su, Jian}, title = {MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37175-37185} }
See What I Mean: Aligning Vision and Language Representations for Video Fine-grained Object Understanding: Boyuan Sun,

Bo-Wen Yin,

Yuan-Ming Li,

Xihan Wei,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Boyuan and Yin, Bo-Wen and Li, Yuan-Ming and Wei, Xihan and Hou, Qibin}, title = {See What I Mean: Aligning Vision and Language Representations for Video Fine-grained Object Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36814-36827} }
VirtueBench: Evaluating Trustworthiness under Uncertainty in Long Video Understanding: Xueqing Yu,

Bohan Li,

Yan Li,

Zhenheng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xueqing and Li, Bohan and Li, Yan and Yang, Zhenheng}, title = {VirtueBench: Evaluating Trustworthiness under Uncertainty in Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40581-40590} }
VLA Models Are More Generalizable Than You Think: Revisiting Physical and Spatial Modeling: Weiqi Li,

Quande Zhang,

Ruifeng Zhai,

Liang Lin,

Guangrun Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiqi and Zhang, Quande and Zhai, Ruifeng and Lin, Liang and Wang, Guangrun}, title = {VLA Models Are More Generalizable Than You Think: Revisiting Physical and Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35025-35035} }
ResCa: Residual Caching for Diffusion Transformers Acceleration: Haipeng Fang,

Yu Li,

Fan Tang,

Yixing Lu,

Juan Cao,

Sheng Tang; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Haipeng and Li, Yu and Tang, Fan and Lu, Yixing and Cao, Juan and Tang, Sheng}, title = {ResCa: Residual Caching for Diffusion Transformers Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32957-32966} }
RECS4R: Bridging Semantics and Geometry for Referring Remote Sensing Interpretation: Jinming Chai,

Lingling Li,

Licheng Jiao,

Xiaoqiang Lu,

Long Sun,

Xu Liu,

Wenping Ma,

Weibin Li; [pdf] [supp]
[bibtex]
@InProceedings{Chai_2026_CVPR, author = {Chai, Jinming and Li, Lingling and Jiao, Licheng and Lu, Xiaoqiang and Sun, Long and Liu, Xu and Ma, Wenping and Li, Weibin}, title = {RECS4R: Bridging Semantics and Geometry for Referring Remote Sensing Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42213-42224} }
Universal Guideline-Driven Image Clustering via a Hybrid LLM Agent: Wenliang Zhong,

Rob Barton,

Lucas Goncalves,

Kushal Kumar,

Feng Jiang,

Hehuan Ma,

Yuzhi Guo,

Vidit Bansal,

Karim Bouyarmane,

Junzhou Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Wenliang and Barton, Rob and Goncalves, Lucas and Kumar, Kushal and Jiang, Feng and Ma, Hehuan and Guo, Yuzhi and Bansal, Vidit and Bouyarmane, Karim and Huang, Junzhou}, title = {Universal Guideline-Driven Image Clustering via a Hybrid LLM Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33749-33760} }
GOR-IS: 3D Gaussian Object Removal In the Intrinsic Space: Yonghao Zhao,

Yupeng Gao,

Jian Yang,

Jin Xie,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yonghao and Gao, Yupeng and Yang, Jian and Xie, Jin and Wang, Beibei}, title = {GOR-IS: 3D Gaussian Object Removal In the Intrinsic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40896-40906} }
IMU-HOI: A Symbiotic Framework for Coherent Human-Object Interaction and Motion Capture via Contact-Conscious Inertial Fusion: Lizhou Lin,

Songpengcheng Xia,

Zengyuan Lai,

Lan Sun,

Jiarui Yang,

Ling Pei; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Lizhou and Xia, Songpengcheng and Lai, Zengyuan and Sun, Lan and Yang, Jiarui and Pei, Ling}, title = {IMU-HOI: A Symbiotic Framework for Coherent Human-Object Interaction and Motion Capture via Contact-Conscious Inertial Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42901-42910} }
DSO: Direct Steering Optimization for Bias Mitigation: Lucas Monteiro Paes,

Nivedha Sivakumar,

Yinong Oliver Wang,

Masha Fedzechkina,

Barry-John Theobald,

Luca Zappella,

Nicholas Apostoloff; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paes_2026_CVPR, author = {Paes, Lucas Monteiro and Sivakumar, Nivedha and Wang, Yinong Oliver and Fedzechkina, Masha and Theobald, Barry-John and Zappella, Luca and Apostoloff, Nicholas}, title = {DSO: Direct Steering Optimization for Bias Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31714-31724} }
Graph Attention Prototypical Network for Robust Few-Shot Classification: Tingyun Liu,

Licheng Liu,

Qibin Zhang,

Qiying Feng,

C. L. Philip Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tingyun and Liu, Licheng and Zhang, Qibin and Feng, Qiying and Chen, C. L. Philip}, title = {Graph Attention Prototypical Network for Robust Few-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33933-33942} }
Dynamic Magic: Unleashing Restricted Knowledge for Lifelong Person Re-Identification: Jinjia Peng,

Jican Tan,

Jiazuo Yu,

Zeze Tao,

Huibing Wang; [pdf]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Jinjia and Tan, Jican and Yu, Jiazuo and Tao, Zeze and Wang, Huibing}, title = {Dynamic Magic: Unleashing Restricted Knowledge for Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32278-32287} }
PnP-CM: Consistency Models as Plug-and-Play Priors for Inverse Problems: Merve Gulle,

Junno Yun,

Yasar Utku Alcalar,

Mehmet Akcakaya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gulle_2026_CVPR, author = {Gulle, Merve and Yun, Junno and Alcalar, Yasar Utku and Akcakaya, Mehmet}, title = {PnP-CM: Consistency Models as Plug-and-Play Priors for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38464-38474} }
Remote Sensing Image Super-Resolution for Imbalanced Textures: A Texture-Aware Diffusion Framework: Enzhuo Zhang,

Sijie Zhao,

Dilxat Muhtar,

Zhenshi Li,

Xueliang Zhang,

Pengfeng Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Enzhuo and Zhao, Sijie and Muhtar, Dilxat and Li, Zhenshi and Zhang, Xueliang and Xiao, Pengfeng}, title = {Remote Sensing Image Super-Resolution for Imbalanced Textures: A Texture-Aware Diffusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38238-38247} }
Hist2Style: Histogram-Guided Stylization with Bilateral Grids: Dekel Galor,

Adam Pikielny,

Zhoutong Zhang,

Ke Wang,

Laura Waller,

Jiawen Chen,

Ilya Chugunov; [pdf] [supp]
[bibtex]
@InProceedings{Galor_2026_CVPR, author = {Galor, Dekel and Pikielny, Adam and Zhang, Zhoutong and Wang, Ke and Waller, Laura and Chen, Jiawen and Chugunov, Ilya}, title = {Hist2Style: Histogram-Guided Stylization with Bilateral Grids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29717-29726} }
Mamba Learns in Context: Structure-Aware Domain Generalization for Multi-Task Point Cloud Understanding: Jincen Jiang,

Qianyu Zhou,

Yuhang Li,

Kui Su,

Meili Wang,

Jian Chang,

Jian Jun Zhang,

Xuequan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jincen and Zhou, Qianyu and Li, Yuhang and Su, Kui and Wang, Meili and Chang, Jian and Zhang, Jian Jun and Lu, Xuequan}, title = {Mamba Learns in Context: Structure-Aware Domain Generalization for Multi-Task Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39098-39110} }
MatchED: Crisp Edge Detection Using End-to-End, Matching-based Supervision: Bedrettin Cetinkaya,

Sinan Kalkan,

Emre Akbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cetinkaya_2026_CVPR, author = {Cetinkaya, Bedrettin and Kalkan, Sinan and Akbas, Emre}, title = {MatchED: Crisp Edge Detection Using End-to-End, Matching-based Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42093-42103} }
Robust Promptable Video Object Segmentation: Sohyun Lee,

Yeho Gwon,

Lukas Hoyer,

Konrad Schindler,

Christos Sakaridis,

Suha Kwak; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sohyun and Gwon, Yeho and Hoyer, Lukas and Schindler, Konrad and Sakaridis, Christos and Kwak, Suha}, title = {Robust Promptable Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39838-39847} }
Color-Encoded Illumination for High-Speed Volumetric Scene Reconstruction: David Novikov,

Eilon Vaknin,

Narek Tumanyan,

Mark Sheinin; [pdf] [arXiv]
[bibtex]
@InProceedings{Novikov_2026_CVPR, author = {Novikov, David and Vaknin, Eilon and Tumanyan, Narek and Sheinin, Mark}, title = {Color-Encoded Illumination for High-Speed Volumetric Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41784-41793} }
SAG-GNN: Semantic-Aware Guided GNN for Descriptor-Free 2D-3D Matching: Shihua Zhang,

Tianhao Xu,

Zizhuo Li,

Qing Ma,

Jiayi Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shihua and Xu, Tianhao and Li, Zizhuo and Ma, Qing and Ma, Jiayi}, title = {SAG-GNN: Semantic-Aware Guided GNN for Descriptor-Free 2D-3D Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31410-31420} }
Towards Storytelling Animations: Joint Synthesis of Human and Camera Motions: Boyuan Cheng,

Yingjie Xi,

Rui He,

Jinhe Na,

Ying Cao,

Pengjie Wang,

Jian J. Zhang,

Xiaosong Yang; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Boyuan and Xi, Yingjie and He, Rui and Na, Jinhe and Cao, Ying and Wang, Pengjie and Zhang, Jian J. and Yang, Xiaosong}, title = {Towards Storytelling Animations: Joint Synthesis of Human and Camera Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38376-38386} }
Reasoning-Driven Anomaly Detection and Localization with Image-Level Supervision: Yizhou Jin,

Yuezhu Feng,

Jinjin Zhang,

Peng Wang,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Yizhou and Feng, Yuezhu and Zhang, Jinjin and Wang, Peng and Liu, Qingjie and Wang, Yunhong}, title = {Reasoning-Driven Anomaly Detection and Localization with Image-Level Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43061-43071} }
DEVA: Fine-tuning Multimodal Large Language Models for Visual Perception Tasks: Debasmit Das,

Munawar Hayat,

Fatih Porikli; [pdf] [supp]
[bibtex]
@InProceedings{Das_2026_CVPR, author = {Das, Debasmit and Hayat, Munawar and Porikli, Fatih}, title = {DEVA: Fine-tuning Multimodal Large Language Models for Visual Perception Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39141-39151} }
MV-Fashion: Towards Enabling Virtual Try-On and Size Estimation with Multi-View Paired Data: Hunor Laczkó,

Libang Jia,

Loc-Phat Truong,

Diego Hernández,

Sergio Escalera,

Jordi Gonzalez,

Meysam Madadi; [pdf] [supp]
[bibtex]
@InProceedings{Laczko_2026_CVPR, author = {Laczk\'o, Hunor and Jia, Libang and Truong, Loc-Phat and Hern\'andez, Diego and Escalera, Sergio and Gonzalez, Jordi and Madadi, Meysam}, title = {MV-Fashion: Towards Enabling Virtual Try-On and Size Estimation with Multi-View Paired Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42810-42823} }
UAV-CB: A Complex-Background RGB-T Dataset and Local Frequency Bridge Network for UAV Detection: Shenghui Huang,

Menghao Hu,

Longkun Zou,

Hongyu Chi,

Zekai Li,

Feng Gao,

Fan Yang,

Qingyao Wu,

Ke Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Shenghui and Hu, Menghao and Zou, Longkun and Chi, Hongyu and Li, Zekai and Gao, Feng and Yang, Fan and Wu, Qingyao and Chen, Ke}, title = {UAV-CB: A Complex-Background RGB-T Dataset and Local Frequency Bridge Network for UAV Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40468-40478} }
Generalizable Sparse-View 3D Reconstruction from Unconstrained Images: Vinayak Gupta,

Chih-Hao Lin,

Shenlong Wang,

Anand Bhattad,

Jia-Bin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gupta_2026_CVPR, author = {Gupta, Vinayak and Lin, Chih-Hao and Wang, Shenlong and Bhattad, Anand and Huang, Jia-Bin}, title = {Generalizable Sparse-View 3D Reconstruction from Unconstrained Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33217-33226} }
MoDES: Accelerating Mixture-of-Experts Multimodal Large Language Models via Dynamic Expert Skipping: Yushi Huang,

Zining Wang,

Zhihang Yuan,

Yifu Ding,

Ruihao Gong,

Jinyang Guo,

Xianglong Liu,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yushi and Wang, Zining and Yuan, Zhihang and Ding, Yifu and Gong, Ruihao and Guo, Jinyang and Liu, Xianglong and Zhang, Jun}, title = {MoDES: Accelerating Mixture-of-Experts Multimodal Large Language Models via Dynamic Expert Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30205-30215} }
SearchAD: Large-Scale Rare Image Retrieval Dataset for Autonomous Driving: Felix Embacher,

Jonas Uhrig,

Marius Cordts,

Markus Enzweiler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Embacher_2026_CVPR, author = {Embacher, Felix and Uhrig, Jonas and Cordts, Marius and Enzweiler, Markus}, title = {SearchAD: Large-Scale Rare Image Retrieval Dataset for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33088-33098} }
Space-Time Forecasting of Dynamic Scenes with Motion-aware Gaussian Grouping: Junmyeong Lee,

Hoseung Choi,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junmyeong and Choi, Hoseung and Cho, Minsu}, title = {Space-Time Forecasting of Dynamic Scenes with Motion-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41033-41043} }
Nonlinear Color Transfer via Learnable Bezier Flows: Junhyoung Lee,

Seongwoon Jo,

JeongHun Park,

Yeonji Ryou,

Jeongha Yang,

Jangho Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junhyoung and Jo, Seongwoon and Park, JeongHun and Ryou, Yeonji and Yang, Jeongha and Kim, Jangho}, title = {Nonlinear Color Transfer via Learnable Bezier Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41741-41751} }
DreamOmni2: Multimodal Instruction-based Generation and Editing: Bin Xia,

bohao peng,

Yuechen Zhang,

Junjia Huang,

Jiyang Liu,

Jingyao Li,

Haoru Tan,

Sitong Wu,

Chengyao Wang,

Yitong Wang,

Bei Yu,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Bin and peng, bohao and Zhang, Yuechen and Huang, Junjia and Liu, Jiyang and Li, Jingyao and Tan, Haoru and Wu, Sitong and Wang, Chengyao and Wang, Yitong and Yu, Bei and Jia, Jiaya}, title = {DreamOmni2: Multimodal Instruction-based Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29275-29284} }
WiseEdit: Benchmarking Cognition- and Creativity-Informed Image Editing: Kaihang Pan,

Weile Chen,

Haiyi Qiu,

Qifan Yu,

Wendong Bu,

Zehan Wang,

Yun Zhu,

Juncheng Li,

Siliang Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Kaihang and Chen, Weile and Qiu, Haiyi and Yu, Qifan and Bu, Wendong and Wang, Zehan and Zhu, Yun and Li, Juncheng and Tang, Siliang}, title = {WiseEdit: Benchmarking Cognition- and Creativity-Informed Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37268-37278} }
Interpretable Prompts made Edit-Friendly: Token-to-Token Similarity Reduction in dLLMs for Edit-Friendly Hard Prompt Inversion: Naresh Kumar Devulapally,

Shruti Agarwal,

Vishal Asnani,

Vishnu Suresh Lokhande; [pdf] [supp]
[bibtex]
@InProceedings{Devulapally_2026_CVPR, author = {Devulapally, Naresh Kumar and Agarwal, Shruti and Asnani, Vishal and Lokhande, Vishnu Suresh}, title = {Interpretable Prompts made Edit-Friendly: Token-to-Token Similarity Reduction in dLLMs for Edit-Friendly Hard Prompt Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43290-43299} }
Topology-aware Feature Propagation for Unsupervised Non-rigid Point Cloud Correspondence: Haozhe Chen,

Rui Li,

Zhengbao Wang,

Xinhao Zhu,

Linjie Li,

Tianyu Xiong,

Xuan Ouyang,

Jiaqi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haozhe and Li, Rui and Wang, Zhengbao and Zhu, Xinhao and Li, Linjie and Xiong, Tianyu and Ouyang, Xuan and Yang, Jiaqi}, title = {Topology-aware Feature Propagation for Unsupervised Non-rigid Point Cloud Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31389-31399} }
LifeEval: A Multimodal Benchmark for Assistive AI in Egocentric Daily Life Tasks: Hengjian Gao,

Kaiwei Zhang,

Shibo Wang,

Mingjie Chen,

Qihang Cao,

Xianfeng Wang,

Yucheng Zhu,

Xiongkuo Min,

Wei Sun,

Dandan Zhu,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Hengjian and Zhang, Kaiwei and Wang, Shibo and Chen, Mingjie and Cao, Qihang and Wang, Xianfeng and Zhu, Yucheng and Min, Xiongkuo and Sun, Wei and Zhu, Dandan and Zhai, Guangtao}, title = {LifeEval: A Multimodal Benchmark for Assistive AI in Egocentric Daily Life Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32892-32902} }
Decision Boundary-aware Generation for Long-tailed Learning: Jiacheng Yang,

Ruichi Zhang,

Chikai Shang,

Mengke Li,

Xinyi Shang,

Junlong Gao,

Yonggang Zhang,

Yang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiacheng and Zhang, Ruichi and Shang, Chikai and Li, Mengke and Shang, Xinyi and Gao, Junlong and Zhang, Yonggang and Lu, Yang}, title = {Decision Boundary-aware Generation for Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29441-29450} }
Attribution-Guided Model Rectification of Unreliable Neural Network Behaviors: Peiyu Yang,

Naveed Akhtar,

Jiantong Jiang,

Ajmal Mian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Peiyu and Akhtar, Naveed and Jiang, Jiantong and Mian, Ajmal}, title = {Attribution-Guided Model Rectification of Unreliable Neural Network Behaviors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38979-38989} }
DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation: Zehong Ma,

Longhui Wei,

Shuai Wang,

Shiliang Zhang,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Zehong and Wei, Longhui and Wang, Shuai and Zhang, Shiliang and Tian, Qi}, title = {DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43600-43610} }
QuadSync: Quadrifocal Tensor Synchronization via Tucker Decomposition: Daniel Miao,

Gilad Lerman,

Joe Kileel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2026_CVPR, author = {Miao, Daniel and Lerman, Gilad and Kileel, Joe}, title = {QuadSync: Quadrifocal Tensor Synchronization via Tucker Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28786-28795} }
RealVLG-R1: A Large-Scale Real-World Visual-Language Grounding Benchmark for Robotic Perception and Manipulation: Linfei Li,

Lin Zhang,

Ying Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Linfei and Zhang, Lin and Shen, Ying}, title = {RealVLG-R1: A Large-Scale Real-World Visual-Language Grounding Benchmark for Robotic Perception and Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42396-42407} }
Test-Time Instance-Specific Parameter Composition: A New Paradigm for Adaptive Generative Modeling: Minh-Tuan Tran,

Xuan-May Le,

Quan Hung Tran,

Mehrtash Harandi,

Dinh Phung,

Trung Le; [pdf] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Minh-Tuan and Le, Xuan-May and Tran, Quan Hung and Harandi, Mehrtash and Phung, Dinh and Le, Trung}, title = {Test-Time Instance-Specific Parameter Composition: A New Paradigm for Adaptive Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37982-37991} }
LA-Pose: Latent Action Pretraining Meets Pose Estimation: Zhengqing Wang,

Saurabh Nair,

Prajwal Chidananda,

Pujith Kachana,

Samuel Li,

Matthew Brown,

Yasutaka Furukawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhengqing and Nair, Saurabh and Chidananda, Prajwal and Kachana, Pujith and Li, Samuel and Brown, Matthew and Furukawa, Yasutaka}, title = {LA-Pose: Latent Action Pretraining Meets Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34460-34469} }
Motion-Aware Animatable Gaussian Avatars Deblurring: Muyao Niu,

Yifan Zhan,

Qingtian Zhu,

Zhuoxiao Li,

Wei Wang,

Zhihang Zhong,

Xiao Sun,

Yinqiang Zheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Muyao and Zhan, Yifan and Zhu, Qingtian and Li, Zhuoxiao and Wang, Wei and Zhong, Zhihang and Sun, Xiao and Zheng, Yinqiang}, title = {Motion-Aware Animatable Gaussian Avatars Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40140-40151} }
FastEventDGS: Deformable Gaussian Splatting for Fast Dynamic Scenes from a Single Event Camera: Zijia Dai,

Nico Messikommer,

Rong Zou,

Nikola Zubic,

Davide Scaramuzza,

Laurent Kneip; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Zijia and Messikommer, Nico and Zou, Rong and Zubic, Nikola and Scaramuzza, Davide and Kneip, Laurent}, title = {FastEventDGS: Deformable Gaussian Splatting for Fast Dynamic Scenes from a Single Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29537-29546} }
Bridging Fidelity-Reality with Controllable One-Step Diffusion for Image Super-Resolution: Hao Chen,

Junyang Chen,

Jinshan Pan,

Jiangxin Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hao and Chen, Junyang and Pan, Jinshan and Dong, Jiangxin}, title = {Bridging Fidelity-Reality with Controllable One-Step Diffusion for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30584-30594} }
ACE-Merging: Data-Free Model Merging with Adaptive Covariance Estimation: Bo Xu,

Haotian Wu,

Hehai Lin,

Weiquan Huang,

Beier Zhu,

Yao Shu,

Chengwei Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Wu, Haotian and Lin, Hehai and Huang, Weiquan and Zhu, Beier and Shu, Yao and Qin, Chengwei}, title = {ACE-Merging: Data-Free Model Merging with Adaptive Covariance Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29472-29482} }
Hyperbolic Busemann Neural Networks: Ziheng Chen,

Bernhard Schölkopf,

Nicu Sebe; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ziheng and Sch\"olkopf, Bernhard and Sebe, Nicu}, title = {Hyperbolic Busemann Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42029-42038} }
TTAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events: Jiaxiong Liu,

Zhen Tan,

Jinpu Zhang,

Yi Zhou,

Hui Shen,

Xieyuanli Chen,

Dewen Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaxiong and Tan, Zhen and Zhang, Jinpu and Zhou, Yi and Shen, Hui and Chen, Xieyuanli and Hu, Dewen}, title = {TTAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37053-37062} }
FVAR: Next-Focus Prediction for Visual Autoregressive Modeling: Xiaofan Li,

Chenming Wu,

Yanpeng Sun,

Jiaming Zhou,

Delin Qu,

Yansong Qu,

Weihao Bo,

Haibao Yu,

Dingkang Liang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaofan and Wu, Chenming and Sun, Yanpeng and Zhou, Jiaming and Qu, Delin and Qu, Yansong and Bo, Weihao and Yu, Haibao and Liang, Dingkang}, title = {FVAR: Next-Focus Prediction for Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30391-30401} }
OLATverse: A Large-scale Real-world Object Dataset with Precise Lighting Control: Xilong Zhou,

Jianchun Chen,

Pramod Rao,

Timo Teufel,

Linjie Lyu,

Tigran Minasian,

Oleksandr Sotnychenko,

Xiao-Xiao Long,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xilong and Chen, Jianchun and Rao, Pramod and Teufel, Timo and Lyu, Linjie and Minasian, Tigran and Sotnychenko, Oleksandr and Long, Xiao-Xiao and Habermann, Marc and Theobalt, Christian}, title = {OLATverse: A Large-scale Real-world Object Dataset with Precise Lighting Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28848-28859} }
Landscape-Awareness for Geometric View Diffusion Model: Yan-Ting Chen,

Hao-Wei Chen,

Tsu-Ching Hsiao,

Chun-Yi Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yan-Ting and Chen, Hao-Wei and Hsiao, Tsu-Ching and Lee, Chun-Yi}, title = {Landscape-Awareness for Geometric View Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38475-38486} }
One Algorithm to Align Them All: Boyi Pang,

Savva Ignatyev,

Vladimir Ippolitov,

Ramil Khafizov,

Yurii Melnik,

Oleg Voynov,

Maksim Nakhodnov,

Aibek Alanov,

Xiaopeng Fan,

Peter Wonka,

Evgeny Burnaev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2026_CVPR, author = {Pang, Boyi and Ignatyev, Savva and Ippolitov, Vladimir and Khafizov, Ramil and Melnik, Yurii and Voynov, Oleg and Nakhodnov, Maksim and Alanov, Aibek and Fan, Xiaopeng and Wonka, Peter and Burnaev, Evgeny}, title = {One Algorithm to Align Them All}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30446-30456} }
Beyond [CLS] Token: Query-Driven Token-Level Forgery Purification for Generalizable Deepfake Detection: Changshuo Wang,

Jiangming Wang,

Ke-Yue Zhang,

Taiping Yao,

Shouhong Ding,

Shunli Wang,

Ran Yi,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Changshuo and Wang, Jiangming and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong and Wang, Shunli and Yi, Ran and Ma, Lizhuang}, title = {Beyond [CLS] Token: Query-Driven Token-Level Forgery Purification for Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42922-42931} }
Predict Before You Explore: Predictive Planning with Specialized Memory for Embodied Question Answering: Bowen Yuan,

Sisi You,

Bing-Kun Bao; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Bowen and You, Sisi and Bao, Bing-Kun}, title = {Predict Before You Explore: Predictive Planning with Specialized Memory for Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29610-29619} }
RenderFlow: Single-Step Neural Rendering via Flow Matching: Shenghao Zhang,

Runtao Liu,

Christopher Schroers,

Yang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shenghao and Liu, Runtao and Schroers, Christopher and Zhang, Yang}, title = {RenderFlow: Single-Step Neural Rendering via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40602-40611} }
RDF-MIG: A Robust Diffusion Framework for Masked Image Generation to Augment Semantic Segmentation and Change Detection: Zian Cao,

Wei Wei,

Qingshan Gao,

Yuanyuan Fu; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Zian and Wei, Wei and Gao, Qingshan and Fu, Yuanyuan}, title = {RDF-MIG: A Robust Diffusion Framework for Masked Image Generation to Augment Semantic Segmentation and Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35758-35767} }
iLRM: An Iterative Large 3D Reconstruction Model: Gyeongjin Kang,

Seungtae Nam,

Seungkwon Yang,

Xiangyu Sun,

Sameh Khamis,

Abdelrahman Mohamed,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Gyeongjin and Nam, Seungtae and Yang, Seungkwon and Sun, Xiangyu and Khamis, Sameh and Mohamed, Abdelrahman and Park, Eunbyung}, title = {iLRM: An Iterative Large 3D Reconstruction Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37332-37342} }
Unified Multimodal Models as Auto-Encoders: Zhiyuan Yan,

Kaiqing Lin,

Zongjian Li,

Junyan Ye,

Hui Han,

Haochen Wang,

Zhendong Wang,

Bin Lin,

Hao Li,

Xinyan Xiao,

Jingdong Wang,

Haifeng Wang,

Li Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Zhiyuan and Lin, Kaiqing and Li, Zongjian and Ye, Junyan and Han, Hui and Wang, Haochen and Wang, Zhendong and Lin, Bin and Li, Hao and Xiao, Xinyan and Wang, Jingdong and Wang, Haifeng and Yuan, Li}, title = {Unified Multimodal Models as Auto-Encoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41903-41912} }
Test-Time 3D Occupancy Prediction: Fengyi Zhang,

Xiangyu Sun,

Huitong Yang,

Zheng Zhang,

Zi Huang,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Fengyi and Sun, Xiangyu and Yang, Huitong and Zhang, Zheng and Huang, Zi and Luo, Yadan}, title = {Test-Time 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35691-35701} }
DGS: Dual Gradient and Semantic-Shift Guided Low-Rank Adaptation for Class Incremental Learning: Kai Li,

Jiafeng Li,

Lianghua He,

Ying Wen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kai and Li, Jiafeng and He, Lianghua and Wen, Ying}, title = {DGS: Dual Gradient and Semantic-Shift Guided Low-Rank Adaptation for Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32267-32277} }
Robo-SGG: Exploiting Layout-Oriented Normalization and Restitution Can Improve Robust Scene Graph Generation: Changsheng Lv,

Zijian Fu,

Mengshi Qi; [pdf] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Changsheng and Fu, Zijian and Qi, Mengshi}, title = {Robo-SGG: Exploiting Layout-Oriented Normalization and Restitution Can Improve Robust Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39282-39292} }
Vision-Language Model Guided Source-Free Domain Adaptation via Optimal Transport: Shuo Han,

Xu Tang,

Jingjing Ma,

Xiangrong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Shuo and Tang, Xu and Ma, Jingjing and Zhang, Xiangrong}, title = {Vision-Language Model Guided Source-Free Domain Adaptation via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36989-36998} }
Adapting Lightweight Image-based Counting Models for Video Crowd Counting: Weibo Shu,

Antoni B. Chan; [pdf] [supp]
[bibtex]
@InProceedings{Shu_2026_CVPR, author = {Shu, Weibo and Chan, Antoni B.}, title = {Adapting Lightweight Image-based Counting Models for Video Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35227-35237} }
Spot The Ball: A Benchmark for Visual Social Inference: Neha Balamurugan,

Sarah Wu,

Cristobal Eyzaguirre,

Tobias Gerstenberg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Balamurugan_2026_CVPR, author = {Balamurugan, Neha and Wu, Sarah and Eyzaguirre, Cristobal and Gerstenberg, Tobias}, title = {Spot The Ball: A Benchmark for Visual Social Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30919-30928} }
VisMem: Latent Vision Memory Unlocks Potential of Vision-Language Models: Xinlei Yu,

Chengming Xu,

Guibin Zhang,

Zhangquan Chen,

Yudong Zhang,

Yongbo He,

Peng-Tao Jiang,

Jiangning Zhang,

Xiaobin Hu,

Shuicheng Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xinlei and Xu, Chengming and Zhang, Guibin and Chen, Zhangquan and Zhang, Yudong and He, Yongbo and Jiang, Peng-Tao and Zhang, Jiangning and Hu, Xiaobin and Yan, Shuicheng}, title = {VisMem: Latent Vision Memory Unlocks Potential of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31544-31555} }
Multi-Modal Representation Learning via Semi-Supervised Rate Reduction for Generalized Category Discovery: Wei He,

Xianghan Meng,

Zhiyuan Huang,

Xianbiao Qi,

Rong Xiao,

Chun-Guang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Wei and Meng, Xianghan and Huang, Zhiyuan and Qi, Xianbiao and Xiao, Rong and Li, Chun-Guang}, title = {Multi-Modal Representation Learning via Semi-Supervised Rate Reduction for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39637-39646} }
4DWorldBench: A Comprehensive Evaluation Framework for 3D/4D World Generation Models: Yiting Lu,

Wei Luo,

Peiyan Tu,

Haoran Li,

Hanxin Zhu,

Zihao Yu,

Xingrui Wang,

Xinyi Chen,

Xinge Peng,

Xin Li,

Zhibo Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yiting and Luo, Wei and Tu, Peiyan and Li, Haoran and Zhu, Hanxin and Yu, Zihao and Wang, Xingrui and Chen, Xinyi and Peng, Xinge and Li, Xin and Chen, Zhibo}, title = {4DWorldBench: A Comprehensive Evaluation Framework for 3D/4D World Generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34322-34332} }
TaskIT: Memory-Efficient Fine-Tuning of Multi-LoRA LLMs via Cross-Task Importance Transfer: Cheng Fang,

Zimu Zhou,

Ke Ma,

Bin Guo; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Cheng and Zhou, Zimu and Ma, Ke and Guo, Bin}, title = {TaskIT: Memory-Efficient Fine-Tuning of Multi-LoRA LLMs via Cross-Task Importance Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37010-37021} }
URICA: A Uniformity Region Affine Identifier Capture Algorithm for Arbitrary Region Retrieval in Pathology Images: Ri Su,

Zhao Chen,

Caleb Chen Cao,

Lei Chen; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Ri and Chen, Zhao and Cao, Caleb Chen and Chen, Lei}, title = {URICA: A Uniformity Region Affine Identifier Capture Algorithm for Arbitrary Region Retrieval in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32777-32786} }
FedMOP: Achieving Enhanced Privacy and Performance in Federated Learning via Momentum Orthogonal Projection: Yunlong Zhao,

Xiaoheng Deng,

Hongyan Xu,

Zhuohua Qiu,

Xiaowen Hu,

Shan You,

Yi Chen,

Chang Xu,

Xiu Su; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yunlong and Deng, Xiaoheng and Xu, Hongyan and Qiu, Zhuohua and Hu, Xiaowen and You, Shan and Chen, Yi and Xu, Chang and Su, Xiu}, title = {FedMOP: Achieving Enhanced Privacy and Performance in Federated Learning via Momentum Orthogonal Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39425-39434} }
Scenes as Tokens: Multi-Scale Normal Distributions Transform Tokenizer for General 3D Vision-Language Understanding: Yutao Tang,

Cheng Zhao,

Gaurav Mittal,

Rohith Kukkala,

Rama Chellappa,

Cheng Peng,

Mei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yutao and Zhao, Cheng and Mittal, Gaurav and Kukkala, Rohith and Chellappa, Rama and Peng, Cheng and Chen, Mei}, title = {Scenes as Tokens: Multi-Scale Normal Distributions Transform Tokenizer for General 3D Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38584-38594} }
ViterbiPlanNet: Injecting Procedural Knowledge via Differentiable Viterbi for Planning in Instructional Videos: Luigi Seminara,

Davide Moltisanti,

Antonino Furnari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seminara_2026_CVPR, author = {Seminara, Luigi and Moltisanti, Davide and Furnari, Antonino}, title = {ViterbiPlanNet: Injecting Procedural Knowledge via Differentiable Viterbi for Planning in Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31240-31249} }
A Mixed Diet Makes DINO An Omnivorous Vision Encoder: Rishabh Kabra,

Maks Ovsjanikov,

Drew A. Hudson,

Ye Xia,

Skanda Koppula,

Andre Araujo,

Joao Carreira,

Niloy J. Mitra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kabra_2026_CVPR, author = {Kabra, Rishabh and Ovsjanikov, Maks and Hudson, Drew A. and Xia, Ye and Koppula, Skanda and Araujo, Andre and Carreira, Joao and Mitra, Niloy J.}, title = {A Mixed Diet Makes DINO An Omnivorous Vision Encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36850-36860} }
HiDRA: Hierarchical Degradation Representation and Adaptation with Generative Priors for Enhancing Infrared Vision: Zihang Chen,

Zhu Liu,

Changbo Yan,

Jinyuan Liu,

Risheng Liu; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zihang and Liu, Zhu and Yan, Changbo and Liu, Jinyuan and Liu, Risheng}, title = {HiDRA: Hierarchical Degradation Representation and Adaptation with Generative Priors for Enhancing Infrared Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37434-37444} }
VRR-QA: Visual Relational Reasoning in Videos Beyond Explicit Cues: Sirnam Swetha,

Rohit Gupta,

Parth Parag Kulkarni,

David G Shatwell,

Jeffrey A Chan Santiago,

Nyle Siddiqui,

Joseph Fioresi,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Swetha_2026_CVPR, author = {Swetha, Sirnam and Gupta, Rohit and Kulkarni, Parth Parag and Shatwell, David G and A Chan Santiago, Jeffrey and Siddiqui, Nyle and Fioresi, Joseph and Shah, Mubarak}, title = {VRR-QA: Visual Relational Reasoning in Videos Beyond Explicit Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32840-32849} }
ESAM++: Efficient Online 3D Perception on the Edge: Qin Liu,

Lavisha Aggarwal,

Saptarashmi Bandyopadhyay,

Vikas Bahirwani,

Marc Niethammer,

Ehsan Adeli,

Andrea Colaco; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qin and Aggarwal, Lavisha and Bandyopadhyay, Saptarashmi and Bahirwani, Vikas and Niethammer, Marc and Adeli, Ehsan and Colaco, Andrea}, title = {ESAM++: Efficient Online 3D Perception on the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39021-39030} }
Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization: Hanchao Liu,

Fang-Lue Zhang,

Shining Zhang,

Tai-Jiang Mu,

Shi-Min Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hanchao and Zhang, Fang-Lue and Zhang, Shining and Mu, Tai-Jiang and Hu, Shi-Min}, title = {Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38291-38301} }
CTCal: Rethinking Text-to-Image Diffusion Models via Cross-Timestep Self-Calibration: Xiefan Guo,

Xinzhu Ma,

Haiyu Zhang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Xiefan and Ma, Xinzhu and Zhang, Haiyu and Huang, Di}, title = {CTCal: Rethinking Text-to-Image Diffusion Models via Cross-Timestep Self-Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43558-43567} }
StereoWorld: Geometry-Aware Monocular-to-Stereo Video Generation: Ke Xing,

Longfei Li,

Yuyang Yin,

Hanwen Liang,

Guixun Luo,

Chen Fang,

Jue Wang,

Konstantinos N. Plataniotis,

Xiaojie Jin,

Yao Zhao,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Ke and Li, Longfei and Yin, Yuyang and Liang, Hanwen and Luo, Guixun and Fang, Chen and Wang, Jue and Plataniotis, Konstantinos N. and Jin, Xiaojie and Zhao, Yao and Wei, Yunchao}, title = {StereoWorld: Geometry-Aware Monocular-to-Stereo Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40245-40255} }
Low-Rank Test-Time Training for Pre-Trained Point Cloud Models: Ouyangzi Ye,

Feifei Shao,

Kexin Li,

Yawei Luo,

Zikai Song,

Ping Liu,

Fengda Zhang,

Hongwei Wang,

Jun Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Ouyangzi and Shao, Feifei and Li, Kexin and Luo, Yawei and Song, Zikai and Liu, Ping and Zhang, Fengda and Wang, Hongwei and Xiao, Jun}, title = {Low-Rank Test-Time Training for Pre-Trained Point Cloud Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31472-31481} }
AgentDet: A Shared-Blackboard Multi-Agent Framework for Zero-/Few-Shot Object Detection: Haolin Li,

Yaohua Wang,

Ze Yan,

Lijie Wen,

Biqing Huang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haolin and Wang, Yaohua and Yan, Ze and Wen, Lijie and Huang, Biqing}, title = {AgentDet: A Shared-Blackboard Multi-Agent Framework for Zero-/Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41626-41635} }
Pixel2Phys: Distilling Governing Laws from Visual Dynamics: Ruikun Li,

Jun Yao,

Yingfan Hua,

Shixiang Tang,

Biqing Qi,

Bin Liu,

Wanli Ouyang,

Yan Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ruikun and Yao, Jun and Hua, Yingfan and Tang, Shixiang and Qi, Biqing and Liu, Bin and Ouyang, Wanli and Lu, Yan}, title = {Pixel2Phys: Distilling Governing Laws from Visual Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41426-41435} }
Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models: Tianci Bi,

Xiaoyi Zhang,

Yan Lu,

Nanning Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bi_2026_CVPR, author = {Bi, Tianci and Zhang, Xiaoyi and Lu, Yan and Zheng, Nanning}, title = {Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43310-43319} }
EVLF: Early Vision-Language Fusion for Generative Dataset Distillation: Wenqi Cai,

Yawen Zou,

Guang Li,

Chunzhi Gu,

Chao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Wenqi and Zou, Yawen and Li, Guang and Gu, Chunzhi and Zhang, Chao}, title = {EVLF: Early Vision-Language Fusion for Generative Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33953-33962} }
UFVideo: Towards Unified Fine-Grained Video Cooperative Understanding with Large Language Models: Hewen Pan,

Cong Wei,

Dashuang Liang,

Zepeng Huang,

Pengfei Gao,

Ziqi Zhou,

Lulu Xue,

Pengfei Yan,

Xiaoming Wei,

Minghui Li,

Shengshan Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Hewen and Wei, Cong and Liang, Dashuang and Huang, Zepeng and Gao, Pengfei and Zhou, Ziqi and Xue, Lulu and Yan, Pengfei and Wei, Xiaoming and Li, Minghui and Hu, Shengshan}, title = {UFVideo: Towards Unified Fine-Grained Video Cooperative Understanding with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31833-31844} }
Which Concepts to Forget and How to Refuse? Decomposing Concepts for Continual Unlearning in Large Vision-Language Models: Hyundong Jin,

Dongyoon Han,

Eunwoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Hyundong and Han, Dongyoon and Kim, Eunwoo}, title = {Which Concepts to Forget and How to Refuse? Decomposing Concepts for Continual Unlearning in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32288-32298} }
SR3R: Rethinking Super-Resolution 3D Reconstruction With Feed-Forward Gaussian Splatting: Xiang Feng,

Xiangbo Wang,

Tieshi Zhong,

Chengkai Wang,

Yiting Zhao,

Tianxiang Xu,

Zhenzhong Kuang,

Feiwei Qin,

Xuefei Yin,

Yanming Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Xiang and Wang, Xiangbo and Zhong, Tieshi and Wang, Chengkai and Zhao, Yiting and Xu, Tianxiang and Kuang, Zhenzhong and Qin, Feiwei and Yin, Xuefei and Zhu, Yanming}, title = {SR3R: Rethinking Super-Resolution 3D Reconstruction With Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33384-33393} }
WeatherCity: Urban Scene Reconstruction with Controllable Multi-Weather Transformation: Wenhua Wu,

Huai Guan,

Zhe Liu,

Hesheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Wenhua and Guan, Huai and Liu, Zhe and Wang, Hesheng}, title = {WeatherCity: Urban Scene Reconstruction with Controllable Multi-Weather Transformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40949-40958} }
TM-BSN: Triangular-Masked Blind-Spot Network for Real-World Self-Supervised Image Denoising: Junyoung Park,

Youngjin Oh,

Nam Ik Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Junyoung and Oh, Youngjin and Cho, Nam Ik}, title = {TM-BSN: Triangular-Masked Blind-Spot Network for Real-World Self-Supervised Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29877-29886} }
SoPE: Spherical Coordinate-Based Positional Embedding for Enhancing Spatial Perception of 3D LVLMs: Koonting Yip,

Qiyan Zhao,

Wenhao Yu,

Liangyu Yuen,

Mingkai Li,

Xiaofeng Zhang,

Jianmin Ji,

Yanyong Zhang,

Qing Jiang,

Ka-Veng Yuen; [pdf] [supp]
[bibtex]
@InProceedings{Yip_2026_CVPR, author = {Yip, Koonting and Zhao, Qiyan and Yu, Wenhao and Yuen, Liangyu and Li, Mingkai and Zhang, Xiaofeng and Ji, Jianmin and Zhang, Yanyong and Jiang, Qing and Yuen, Ka-Veng}, title = {SoPE: Spherical Coordinate-Based Positional Embedding for Enhancing Spatial Perception of 3D LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33714-33726} }
Forecasting 3D Scanpaths in Egocentric Video: Fiona Ryan,

Ishwarya Ananthabhotla,

Yijun Qian,

Judy Hoffman,

James M. Rehg,

Vamsi Krishna Ithapu,

Calvin Murdock; [pdf] [supp]
[bibtex]
@InProceedings{Ryan_2026_CVPR, author = {Ryan, Fiona and Ananthabhotla, Ishwarya and Qian, Yijun and Hoffman, Judy and Rehg, James M. and Ithapu, Vamsi Krishna and Murdock, Calvin}, title = {Forecasting 3D Scanpaths in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42824-42835} }
SimLBR: Learning to Detect Fake Images by Learning to Detect Real Images: Aayush Dhakal,

Subash Khanal,

Srikumar Sastry,

Jacob Arndt,

Philipe Dias,

Dalton Lunga,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dhakal_2026_CVPR, author = {Dhakal, Aayush and Khanal, Subash and Sastry, Srikumar and Arndt, Jacob and Dias, Philipe and Lunga, Dalton and Jacobs, Nathan}, title = {SimLBR: Learning to Detect Fake Images by Learning to Detect Real Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35472-35482} }
Boosting Visual Reprogramming for CLIP with Dual Granularity Alignment: Jiayang Wu,

Xinyang Chen,

Ke Lv,

Weili Guan; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jiayang and Chen, Xinyang and Lv, Ke and Guan, Weili}, title = {Boosting Visual Reprogramming for CLIP with Dual Granularity Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29347-29356} }
Phased DMD: Few-step Distribution Matching Distillation via Score Matching within Subintervals: Xiangyu Fan,

Zesong Qiu,

Zhuguanyu Wu,

Fanzhou Wang,

Zhiqian Lin,

Tianxiang Ren,

Dahua Lin,

Ruihao Gong,

Lei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Xiangyu and Qiu, Zesong and Wu, Zhuguanyu and Wang, Fanzhou and Lin, Zhiqian and Ren, Tianxiang and Lin, Dahua and Gong, Ruihao and Yang, Lei}, title = {Phased DMD: Few-step Distribution Matching Distillation via Score Matching within Subintervals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41667-41676} }
FaceCam: Portrait Video Camera Control via Scale-Aware Conditioning: Weijie Lyu,

Ming-Hsuan Yang,

Zhixin Shu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Weijie and Yang, Ming-Hsuan and Shu, Zhixin}, title = {FaceCam: Portrait Video Camera Control via Scale-Aware Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30607-30617} }
ORION: ORthonormal Text Encoding for Universal VLM AdaptatION: Omprakash Chakraborty,

Jose Dolz,

Ismail Ben Ayed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chakraborty_2026_CVPR, author = {Chakraborty, Omprakash and Dolz, Jose and Ben Ayed, Ismail}, title = {ORION: ORthonormal Text Encoding for Universal VLM AdaptatION}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31556-31565} }
Mobile-VTON: High-Fidelity On-Device Virtual Try-On: Zhenchen Wan,

Ce Chen,

Runqi Lin,

Jiaxin Huang,

Tianxi Chen,

Yanwu Xu,

Tongliang Liu,

Mingming Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Zhenchen and Chen, Ce and Lin, Runqi and Huang, Jiaxin and Chen, Tianxi and Xu, Yanwu and Liu, Tongliang and Gong, Mingming}, title = {Mobile-VTON: High-Fidelity On-Device Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38081-38090} }
RINO: Rotation-Invariant Non-Rigid Correspondences: Maolin Gao,

Shao Jie Hu-Chen,

Congyue Deng,

Riccardo Marin,

Leonidas Guibas,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Maolin and Hu-Chen, Shao Jie and Deng, Congyue and Marin, Riccardo and Guibas, Leonidas and Cremers, Daniel}, title = {RINO: Rotation-Invariant Non-Rigid Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34683-34693} }
IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation: Hao Wu,

Xiangyang Luo,

Hao Wang,

Jiawei Zhang,

Yi Zhang,

Jinwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Hao and Luo, Xiangyang and Wang, Hao and Zhang, Jiawei and Zhang, Yi and Wang, Jinwei}, title = {IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32967-32977} }
DualReg: Dual-Space Filtering and Reinforcement for Rigid Registration: Jiayi Li,

Yuxin Yao,

Qiuhang Lu,

Juyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Yao, Yuxin and Lu, Qiuhang and Zhang, Juyong}, title = {DualReg: Dual-Space Filtering and Reinforcement for Rigid Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39031-39041} }
AdaSFormer: Adaptive Serialized Transformers for Monocular Semantic Scene Completion from Indoor Environments: Xuzhi Wang,

Xinran Wu,

Song Wang,

Lingdong Kong,

Ziping Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuzhi and Wu, Xinran and Wang, Song and Kong, Lingdong and Zhao, Ziping}, title = {AdaSFormer: Adaptive Serialized Transformers for Monocular Semantic Scene Completion from Indoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34148-34159} }
TimeViper: A Hybrid Mamba-Transformer Vision-Language Model for Efficient Long Video Understanding: Boshen Xu,

Zihan Xiao,

Jiaze Li,

Jianzhong Ju,

Zhenbo Luo,

Jian Luan,

Qin Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Boshen and Xiao, Zihan and Li, Jiaze and Ju, Jianzhong and Luo, Zhenbo and Luan, Jian and Jin, Qin}, title = {TimeViper: A Hybrid Mamba-Transformer Vision-Language Model for Efficient Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40479-40493} }
EfficientVPR: Toward Efficient Visual Place Recognition via Scene-Aware Prompt Tuning and Adaptive Feature Enhancement: Wenjing Tang,

Chuanguang Yang,

Zhulin An,

Libo Huang,

Boyu Diao,

Yongjun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Wenjing and Yang, Chuanguang and An, Zhulin and Huang, Libo and Diao, Boyu and Xu, Yongjun}, title = {EfficientVPR: Toward Efficient Visual Place Recognition via Scene-Aware Prompt Tuning and Adaptive Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33738-33748} }
MU-GeNeRF: Multi-view Uncertainty-guided Generalizable Neural Radiance Fields for Distractor-aware Scene: Wenjie Mu,

Zhan Li,

Chuanzhou Su,

Xuanyi Shen,

Ziniu Liu,

Fan Lu,

Yujian Mo,

Junqiao Zhao,

Tiantian Feng,

Chen Ye,

Guang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mu_2026_CVPR, author = {Mu, Wenjie and Li, Zhan and Su, Chuanzhou and Shen, Xuanyi and Liu, Ziniu and Lu, Fan and Mo, Yujian and Zhao, Junqiao and Feng, Tiantian and Ye, Chen and Chen, Guang}, title = {MU-GeNeRF: Multi-view Uncertainty-guided Generalizable Neural Radiance Fields for Distractor-aware Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38049-38059} }
VQRAE: Representation Quantization Autoencoders for Multimodal Understanding, Generation and Reconstruction: Sinan Du,

Jiahao Guo,

Bo Li,

Shuhao Cui,

Zhengzhuo Xu,

Yifu Luo,

Yongxian Wei,

Kun Gai,

Xinggang Wang,

Kai Wu,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Sinan and Guo, Jiahao and Li, Bo and Cui, Shuhao and Xu, Zhengzhuo and Luo, Yifu and Wei, Yongxian and Gai, Kun and Wang, Xinggang and Wu, Kai and Yuan, Chun}, title = {VQRAE: Representation Quantization Autoencoders for Multimodal Understanding, Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30322-30334} }
Sparse-LaViDa: Sparse Multimodal Discrete Diffusion Language Models: Shufan Li,

Jiuxiang Gu,

Kangning Liu,

Zhe Lin,

Zijun Wei,

Aditya Grover,

Jason Kuen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shufan and Gu, Jiuxiang and Liu, Kangning and Lin, Zhe and Wei, Zijun and Grover, Aditya and Kuen, Jason}, title = {Sparse-LaViDa: Sparse Multimodal Discrete Diffusion Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36103-36114} }
FusionRegister: Every Infrared and Visible Image Fusion Deserves Registration: Congcong Bian,

Haolong Ma,

Hui Li,

Zhongwei Shen,

Xiaoqing Luo,

Xiaoning Song,

Xiao-jun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bian_2026_CVPR, author = {Bian, Congcong and Ma, Haolong and Li, Hui and Shen, Zhongwei and Luo, Xiaoqing and Song, Xiaoning and Wu, Xiao-jun}, title = {FusionRegister: Every Infrared and Visible Image Fusion Deserves Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41551-41561} }
Robustness Under Data Scarcity: Few-Shot Continual Adversarial Training for Evolving Threats: Wenxuan Wang,

Chenglei Wang,

Chengzhi Yan,

Xuelin Qian,

Yanning Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Wenxuan and Wang, Chenglei and Yan, Chengzhi and Qian, Xuelin and Zhang, Yanning}, title = {Robustness Under Data Scarcity: Few-Shot Continual Adversarial Training for Evolving Threats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34908-34917} }
H^2A^2: Homogeneity-Aware and Heterogeneity-Aware Feature Perception for Unified Indoor 3D Object Detection: Tao Xie,

Tao An,

Feng Liu,

Wensheng Jin,

Zhengyu Li,

Lijun Zhao,

Ruifeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tao and An, Tao and Liu, Feng and Jin, Wensheng and Li, Zhengyu and Zhao, Lijun and Li, Ruifeng}, title = {H{\textasciicircum}2A{\textasciicircum}2: Homogeneity-Aware and Heterogeneity-Aware Feature Perception for Unified Indoor 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40717-40726} }
Mirai: Autoregressive Visual Generation Needs Foresight: Yonghao Yu,

Lang Huang,

Zerun Wang,

Runyi Li,

Toshihiko Yamasaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yonghao and Huang, Lang and Wang, Zerun and Li, Runyi and Yamasaki, Toshihiko}, title = {Mirai: Autoregressive Visual Generation Needs Foresight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30511-30520} }
EMMA: Concept Erasure Benchmark with Comprehensive Semantic Metrics and Diverse Categories: Lu Wei,

Yuta Nakashima,

Noa Garcia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Lu and Nakashima, Yuta and Garcia, Noa}, title = {EMMA: Concept Erasure Benchmark with Comprehensive Semantic Metrics and Diverse Categories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37247-37257} }
Solving a Nonlinear Blind Inverse Problem for Tagged MRI with Physics and Deep Generative Priors: Zhangxing Bian,

Shuwen Wei,

Samuel W. Remedios,

Junyu Chen,

Aaron Carass,

Blake Dewey,

Jerry L Prince; [pdf] [arXiv]
[bibtex]
@InProceedings{Bian_2026_CVPR, author = {Bian, Zhangxing and Wei, Shuwen and Remedios, Samuel W. and Chen, Junyu and Carass, Aaron and Dewey, Blake and Prince, Jerry L}, title = {Solving a Nonlinear Blind Inverse Problem for Tagged MRI with Physics and Deep Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41730-41740} }
Spatiotemporal Pyramid Flow Matching for Climate Emulation: Jeremy A. Irvin,

Jiaqi Han,

Zikui Wang,

Abdulaziz Alharbi,

Yufei Zhao,

Nomin-Erdene Bayarsaikhan,

Daniele Visioni,

Andrew Y. Ng,

Duncan Watson-Parris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Irvin_2026_CVPR, author = {Irvin, Jeremy A. and Han, Jiaqi and Wang, Zikui and Alharbi, Abdulaziz and Zhao, Yufei and Bayarsaikhan, Nomin-Erdene and Visioni, Daniele and Ng, Andrew Y. and Watson-Parris, Duncan}, title = {Spatiotemporal Pyramid Flow Matching for Climate Emulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42180-42190} }
Factorized Context Aggregation for Robust Cancer Risk Estimation via Soft Re-Ranked Retrieval and Hierarchical Anchors: Puria Azadi Moghadam,

Ali Khajegili Mirabadi,

Behnam Maneshgar,

Hossein Farahani,

Ali Bashashati; [pdf] [supp]
[bibtex]
@InProceedings{Moghadam_2026_CVPR, author = {Moghadam, Puria Azadi and Mirabadi, Ali Khajegili and Maneshgar, Behnam and Farahani, Hossein and Bashashati, Ali}, title = {Factorized Context Aggregation for Robust Cancer Risk Estimation via Soft Re-Ranked Retrieval and Hierarchical Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34095-34105} }
Relightable Holoported Characters: Capturing and Relighting Dynamic Human Performance from Sparse Views: Kunwar Maheep Singh,

Jianchun Chen,

Vladislav Golyanik,

Stephan J. Garbin,

Thabo Beeler,

Rishabh Dabral,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Kunwar Maheep and Chen, Jianchun and Golyanik, Vladislav and Garbin, Stephan J. and Beeler, Thabo and Dabral, Rishabh and Habermann, Marc and Theobalt, Christian}, title = {Relightable Holoported Characters: Capturing and Relighting Dynamic Human Performance from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28882-28892} }
Match-and-Fuse: Consistent Generation from Unstructured Image Sets: Kate Feingold,

Omri Kaduri,

Tali Dekel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feingold_2026_CVPR, author = {Feingold, Kate and Kaduri, Omri and Dekel, Tali}, title = {Match-and-Fuse: Consistent Generation from Unstructured Image Sets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30490-30499} }
Denoise and Align: Towards Source-Free UDA for Robust Panoramic Semantic Segmentation: Yaowen Chang,

Zhen Cao,

Xu Zheng,

Xiaoxin Mi,

Zhen Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Yaowen and Cao, Zhen and Zheng, Xu and Mi, Xiaoxin and Dong, Zhen}, title = {Denoise and Align: Towards Source-Free UDA for Robust Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32226-32235} }
tttLRM: Test-Time Training for Long Context and Autoregressive 3D Reconstruction: Chen Wang,

Hao Tan,

Wang Yifan,

Zhiqin Chen,

Yuheng Liu,

Kalyan Sunkavalli,

Sai Bi,

Lingjie Liu,

Yiwei Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chen and Tan, Hao and Yifan, Wang and Chen, Zhiqin and Liu, Yuheng and Sunkavalli, Kalyan and Bi, Sai and Liu, Lingjie and Hu, Yiwei}, title = {tttLRM: Test-Time Training for Long Context and Autoregressive 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36582-36592} }
Designing Instance-Level Sampling Schedules via REINFORCE with James-Stein Shrinkage: Peiyu Yu,

Suraj Kothawade,

Sirui Xie,

Ying Nian Wu,

Hongliang Fei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Peiyu and Kothawade, Suraj and Xie, Sirui and Wu, Ying Nian and Fei, Hongliang}, title = {Designing Instance-Level Sampling Schedules via REINFORCE with James-Stein Shrinkage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36040-36050} }
MVInverse: Feed-forward Multiview Inverse Rendering in Seconds: Xiangzuo Wu,

Chengwei Ren,

Jun Zhou,

Xiu Li,

Yuan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiangzuo and Ren, Chengwei and Zhou, Jun and Li, Xiu and Liu, Yuan}, title = {MVInverse: Feed-forward Multiview Inverse Rendering in Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37343-37357} }
MM-ACT: Learn from Multimodal Parallel Generation to Act: Haotian Liang,

Xinyi Chen,

Bin Wang,

Mingkang Chen,

Yitian Liu,

Yuhao Zhang,

Zanxin Chen,

Tianshuo Yang,

Yilun Chen,

Jiangmiao Pang,

Dong Liu,

Xiaokang Yang,

Yao Mu,

Wenqi Shao,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Haotian and Chen, Xinyi and Wang, Bin and Chen, Mingkang and Liu, Yitian and Zhang, Yuhao and Chen, Zanxin and Yang, Tianshuo and Chen, Yilun and Pang, Jiangmiao and Liu, Dong and Yang, Xiaokang and Mu, Yao and Shao, Wenqi and Luo, Ping}, title = {MM-ACT: Learn from Multimodal Parallel Generation to Act}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35080-35090} }
CountGD++: Generalized Prompting for Open-World Counting: Niki Amini-Naieni,

Andrew Zisserman; [pdf] [supp]
[bibtex]
@InProceedings{Amini-Naieni_2026_CVPR, author = {Amini-Naieni, Niki and Zisserman, Andrew}, title = {CountGD++: Generalized Prompting for Open-World Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37725-37734} }
Interpretable Debiasing of Vision-Language Models for Social Fairness: Na Min An,

Yoonna Jang,

Yusuke Hirota,

Ryo Hachiuma,

Isabelle Augenstein,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Na Min and Jang, Yoonna and Hirota, Yusuke and Hachiuma, Ryo and Augenstein, Isabelle and Shim, Hyunjung}, title = {Interpretable Debiasing of Vision-Language Models for Social Fairness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39326-39337} }
D3D-VLP: Dynamic 3D Vision-Language-Planning Model for Embodied Grounding and Navigation: Zihan Wang,

Seungjun Lee,

Guangzhao Dai,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihan and Lee, Seungjun and Dai, Guangzhao and Lee, Gim Hee}, title = {D3D-VLP: Dynamic 3D Vision-Language-Planning Model for Embodied Grounding and Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32463-32474} }
Property-Informed Diffusion-Based Text-to-Microstructure Generation: Bingxuan Dai,

Hongsong Wang,

Jie Gui; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Bingxuan and Wang, Hongsong and Gui, Jie}, title = {Property-Informed Diffusion-Based Text-to-Microstructure Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36758-36768} }
Improving Diffusion Generalization with Weak-to-Strong Segmented Guidance: Liangyu Yuan,

Yufei Huang,

Mingkun Lei,

Tong Zhao,

Ruoyu Wang,

Changxi Chi,

Yiwei Wang,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Liangyu and Huang, Yufei and Lei, Mingkun and Zhao, Tong and Wang, Ruoyu and Chi, Changxi and Wang, Yiwei and Zhang, Chi}, title = {Improving Diffusion Generalization with Weak-to-Strong Segmented Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43697-43706} }
IDperturb: Enhancing Variation in Synthetic Face Generation via Angular Perturbations: Fadi Boutros,

Eduarda Caldeira,

Tahar Chettaoui,

Naser Damer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boutros_2026_CVPR, author = {Boutros, Fadi and Caldeira, Eduarda and Chettaoui, Tahar and Damer, Naser}, title = {IDperturb: Enhancing Variation in Synthetic Face Generation via Angular Perturbations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40119-40129} }
ReFlow: Self-correction Motion Learning for Dynamic Scene Reconstruction: Yanzhe Liang,

Ruijie Zhu,

Hanzhi Chang,

Zhuoyuan Li,

Jiahao Lu,

Tianzhu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yanzhe and Zhu, Ruijie and Chang, Hanzhi and Li, Zhuoyuan and Lu, Jiahao and Zhang, Tianzhu}, title = {ReFlow: Self-correction Motion Learning for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29803-29813} }
Is Bin Generation Indispensable? A Bin-Generation-Free Dataset Quantization via Semantic Perspective: Maijie Deng,

Yuhua Li,

Yixiong Zou,

Yao Wu,

Chenru Ma; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Maijie and Li, Yuhua and Zou, Yixiong and Wu, Yao and Ma, Chenru}, title = {Is Bin Generation Indispensable? A Bin-Generation-Free Dataset Quantization via Semantic Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33642-33651} }
SimRecon: SimReady Compositional Scene Reconstruction from Real Videos: Chong Xia,

Kai Zhu,

Zizhuo Wang,

Fangfu Liu,

Zhizheng Zhang,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Zhu, Kai and Wang, Zizhuo and Liu, Fangfu and Zhang, Zhizheng and Duan, Yueqi}, title = {SimRecon: SimReady Compositional Scene Reconstruction from Real Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42452-42463} }
From Corners to Fiducial Tags: Revisiting Checkerboard Calibration for Event Cameras: Taehun Ryu,

Changwoo Kang,

Kyungdon Joo; [pdf] [supp]
[bibtex]
@InProceedings{Ryu_2026_CVPR, author = {Ryu, Taehun and Kang, Changwoo and Joo, Kyungdon}, title = {From Corners to Fiducial Tags: Revisiting Checkerboard Calibration for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29557-29566} }
See and Fix the Flaws: Enabling VLMs and Diffusion Models to Comprehend Visual Artifacts via Agentic Data Synthesis: Jaehyun Park,

Minyoung Ahn,

Minkyu Kim,

Jonghyun Lee,

Jae-Gil Lee,

Dongmin Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jaehyun and Ahn, Minyoung and Kim, Minkyu and Lee, Jonghyun and Lee, Jae-Gil and Park, Dongmin}, title = {See and Fix the Flaws: Enabling VLMs and Diffusion Models to Comprehend Visual Artifacts via Agentic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35810-35820} }
DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior: Junjia Huang,

Binbin Yang,

Pengxiang Yan,

Jiyang Liu,

Bin Xia,

Zhao Wang,

Yitong Wang,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Junjia and Yang, Binbin and Yan, Pengxiang and Liu, Jiyang and Xia, Bin and Wang, Zhao and Wang, Yitong and Lin, Liang and Li, Guanbin}, title = {DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36736-36746} }
Do Vision-Language Models Measure Up? Benchmarking Visual Measurement Reading with MeasureBench: Fenfen Lin,

Yesheng Liu,

Haiyu Xu,

Yue Chen,

Zheqi He,

Mingxuan Zhao,

Miguel Hu Chen,

Jin-Ge Yao,

Xi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Fenfen and Liu, Yesheng and Xu, Haiyu and Chen, Yue and He, Zheqi and Zhao, Mingxuan and Chen, Miguel Hu and Yao, Jin-Ge and Yang, Xi}, title = {Do Vision-Language Models Measure Up? Benchmarking Visual Measurement Reading with MeasureBench}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38544-38553} }
Compressed-Domain-Aware Online Video Super-Resolution: Yuhang Wang,

Hai Li,

Shujuan Hou,

Zhetao Dong,

Xiaoyao Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhang and Li, Hai and Hou, Shujuan and Dong, Zhetao and Yang, Xiaoyao}, title = {Compressed-Domain-Aware Online Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33621-33631} }
SVBench: Evaluation of Video Generation Models on Social Reasoning: Wenshuo Peng,

Gongxuan Wang,

Tianmeng Yang,

Chuanhao Li,

Xiaojie Xu,

Hui He,

Kaipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Wenshuo and Wang, Gongxuan and Yang, Tianmeng and Li, Chuanhao and Xu, Xiaojie and He, Hui and Zhang, Kaipeng}, title = {SVBench: Evaluation of Video Generation Models on Social Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32872-32881} }
TESO: Online Tracking of Essential Matrix by Stochastic Optimization: Jaroslav Moravec,

Radim Sara,

Akihiro Sugimoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moravec_2026_CVPR, author = {Moravec, Jaroslav and Sara, Radim and Sugimoto, Akihiro}, title = {TESO: Online Tracking of Essential Matrix by Stochastic Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28819-28827} }
AGENTSAFE: Benchmarking the Safety of Embodied Agents on Hazardous Instructions: Zonghao Ying,

Le Wang,

Yisong Xiao,

Jiakai Wang,

Yuqing Ma,

Jinyang Guo,

Zhenfei Yin,

Mingchuan Zhang,

Aishan Liu,

Xianglong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2026_CVPR, author = {Ying, Zonghao and Wang, Le and Xiao, Yisong and Wang, Jiakai and Ma, Yuqing and Guo, Jinyang and Yin, Zhenfei and Zhang, Mingchuan and Liu, Aishan and Liu, Xianglong}, title = {AGENTSAFE: Benchmarking the Safety of Embodied Agents on Hazardous Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37664-37673} }
DiT-Distill: Open-Set Fine-Grained Retrieval via Generative Curriculum Knowledge: Xin Jiang,

Hao Tang,

Meiqi Cao,

Junyao Gao,

Fei Shen,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xin and Tang, Hao and Cao, Meiqi and Gao, Junyao and Shen, Fei and Li, Zechao}, title = {DiT-Distill: Open-Set Fine-Grained Retrieval via Generative Curriculum Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38753-38762} }
Mixture-of-Experts based Feature Decoupling for Open Vocabulary Scene Graph Generation: Yiming Li,

Sisi You,

Bing-Kun Bao; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yiming and You, Sisi and Bao, Bing-Kun}, title = {Mixture-of-Experts based Feature Decoupling for Open Vocabulary Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32206-32215} }
GP-4DGS: Probabilistic 4D Gaussian Splatting from Monocular Video via Variational Gaussian Processes: Mijeong Kim,

Jungtaek Kim,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Mijeong and Kim, Jungtaek and Han, Bohyung}, title = {GP-4DGS: Probabilistic 4D Gaussian Splatting from Monocular Video via Variational Gaussian Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33394-33403} }
Language-guided Frequency Modulation for Large Vision-Language Models: Shuyi Ouyang,

Gongfan Fang,

Xinyin Ma,

Yen-Wei Chen,

Lanfen Lin,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Shuyi and Fang, Gongfan and Ma, Xinyin and Chen, Yen-Wei and Lin, Lanfen and Wang, Xinchao}, title = {Language-guided Frequency Modulation for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39175-39185} }
Masked Representation Modeling for Domain-Adaptive Segmentation: Wenlve Zhou,

Zhiheng Zhou,

Tiantao Xian,

Yikui Zhai,

Weibin Wu,

Biyun MA; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wenlve and Zhou, Zhiheng and Xian, Tiantao and Zhai, Yikui and Wu, Weibin and MA, Biyun}, title = {Masked Representation Modeling for Domain-Adaptive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36999-37009} }
Hear What Matters! Text-conditioned Selective Video-to-Audio Generation: Junwon Lee,

Juhan Nam,

Jiyoung Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junwon and Nam, Juhan and Lee, Jiyoung}, title = {Hear What Matters! Text-conditioned Selective Video-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36680-36690} }
Exploring 6D Object Pose Estimation with Deformation: Zhiqiang Liu,

Rui Song,

Duanmu Chuangqi,

Jiaojiao Li,

David Ferstl,

Yinlin Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiqiang and Song, Rui and Chuangqi, Duanmu and Li, Jiaojiao and Ferstl, David and Hu, Yinlin}, title = {Exploring 6D Object Pose Estimation with Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33078-33087} }
DRS-GUI: Dynamic Region Search for Training-Free GUI Grounding: Yichao Liu,

Huawen Shen,

Liu Yu,

Shiyu Liu,

Zeyu Chen,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yichao and Shen, Huawen and Yu, Liu and Liu, Shiyu and Chen, Zeyu and Zhou, Yu}, title = {DRS-GUI: Dynamic Region Search for Training-Free GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34606-34616} }
DiverseDiT: Towards Diverse Representation Learning in Diffusion Transformers: Mengping Yang,

Zhiyu Tan,

Binglei Li,

Xiaomeng Yang,

Hesen Chen,

Hao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Mengping and Tan, Zhiyu and Li, Binglei and Yang, Xiaomeng and Chen, Hesen and Li, Hao}, title = {DiverseDiT: Towards Diverse Representation Learning in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40591-40601} }
SCIEval: Evaluating and Benchmarking the Faithfulness of Scientific Image Generation and Interpretation with Large Multimodal Models: Guanghui Ye,

Huan Zhao,

Zhixue Zhao,

Tengfei Ma,

Kehan Wang,

Steffen Eger,

Zhihua Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Guanghui and Zhao, Huan and Zhao, Zhixue and Ma, Tengfei and Wang, Kehan and Eger, Steffen and Jiang, Zhihua}, title = {SCIEval: Evaluating and Benchmarking the Faithfulness of Scientific Image Generation and Interpretation with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29760-29770} }
SemLayer: Semantic-aware Generative Segmentation and Layer Construction for Abstract Icons: Haiyang Xu,

Ronghuan Wu,

Li-Yi Wei,

Nanxuan Zhao,

Chenxi Liu,

Cuong Nguyen,

Zhuowen Tu,

Zhaowen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Haiyang and Wu, Ronghuan and Wei, Li-Yi and Zhao, Nanxuan and Liu, Chenxi and Nguyen, Cuong and Tu, Zhuowen and Wang, Zhaowen}, title = {SemLayer: Semantic-aware Generative Segmentation and Layer Construction for Abstract Icons}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42082-42092} }
Noise-Aware Few-Shot Learning through Bi-directional Multi-View Prompt Alignment: Lu Niu,

Cheng Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Lu and Xue, Cheng}, title = {Noise-Aware Few-Shot Learning through Bi-directional Multi-View Prompt Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41646-41656} }
GH-NAF: Grid-Adaptive Hash-Level-Attended Neural Attenuation Fields for Discrepancy-Aware CBCT: Seong Je Oh,

Ju Hwan Lee,

Chae Yeon Lim,

Donghwan Lee,

Myung Jin Chung,

Kyungsu Kim; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Seong Je and Lee, Ju Hwan and Lim, Chae Yeon and Lee, Donghwan and Chung, Myung Jin and Kim, Kyungsu}, title = {GH-NAF: Grid-Adaptive Hash-Level-Attended Neural Attenuation Fields for Discrepancy-Aware CBCT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41762-41772} }
ActionMesh: Animated 3D Mesh Generation with Temporal 3D Diffusion: Remy Sabathier,

David Novotny,

Niloy J. Mitra,

Tom Monnier; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sabathier_2026_CVPR, author = {Sabathier, Remy and Novotny, David and Mitra, Niloy J. and Monnier, Tom}, title = {ActionMesh: Animated 3D Mesh Generation with Temporal 3D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34312-34321} }
Towards Intrinsic-Aware Monocular 3D Object Detection: Zhihao Zhang,

Abhinav Kumar,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhihao and Kumar, Abhinav and Liu, Xiaoming}, title = {Towards Intrinsic-Aware Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40739-40750} }
Multimodal RewardBench 2: Evaluating Omni Reward Models for Interleaved Text and Image: Yushi Hu,

Reyhane Askari-Hemmat,

Melissa Hall,

Emily Dinan,

Luke Zettlemoyer,

Marjan Ghazvininejad; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yushi and Askari-Hemmat, Reyhane and Hall, Melissa and Dinan, Emily and Zettlemoyer, Luke and Ghazvininejad, Marjan}, title = {Multimodal RewardBench 2: Evaluating Omni Reward Models for Interleaved Text and Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36904-36915} }
Accelerating Diffusion-based Video Editing via Heterogeneous Caching: Beyond Full Computing at Sampled Denoising Timestep: Tianyi Liu,

Ye Lu,

Linfeng Zhang,

Chen Cai,

Jianjun Gao,

Yi Wang,

Kim-Hui Yap,

Lap-Pui Chau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tianyi and Lu, Ye and Zhang, Linfeng and Cai, Chen and Gao, Jianjun and Wang, Yi and Yap, Kim-Hui and Chau, Lap-Pui}, title = {Accelerating Diffusion-based Video Editing via Heterogeneous Caching: Beyond Full Computing at Sampled Denoising Timestep}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35800-35809} }
Plug-and-Play PDE Optimization for 3D Gaussian Splatting: Toward High-Quality Rendering and Reconstruction: Yifan Mo,

Youcheng Cai,

Ligang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mo_2026_CVPR, author = {Mo, Yifan and Cai, Youcheng and Liu, Ligang}, title = {Plug-and-Play PDE Optimization for 3D Gaussian Splatting: Toward High-Quality Rendering and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33333-33342} }
UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition: Zhuangcheng Gu,

Guang Liang,

Bin Wang,

Zhiyuan Zhao,

Qintong Zhang,

Weijia Li,

Chao Xu,

Bo Zhang,

Botian Shi,

Jiang Wu,

Wentao Zhang,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zhuangcheng and Liang, Guang and Wang, Bin and Zhao, Zhiyuan and Zhang, Qintong and Li, Weijia and Xu, Chao and Zhang, Bo and Shi, Botian and Wu, Jiang and Zhang, Wentao and He, Conghui}, title = {UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34106-34115} }
Fourier Angle Alignment for Oriented Object Detection in Remote Sensing: Changyu Gu,

Linwei Chen,

Lin Gu,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Changyu and Chen, Linwei and Gu, Lin and Fu, Ying}, title = {Fourier Angle Alignment for Oriented Object Detection in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42225-42235} }
SimScale: Learning to Drive via Real-World Simulation at Scale: Haochen Tian,

Tianyu Li,

Haochen Liu,

Jiazhi Yang,

Yihang Qiu,

Guang Li,

Junli Wang,

Yinfeng Gao,

Zhang Zhang,

Liang Wang,

Hangjun Ye,

Long Chen,

Hongyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Haochen and Li, Tianyu and Liu, Haochen and Yang, Jiazhi and Qiu, Yihang and Li, Guang and Wang, Junli and Gao, Yinfeng and Zhang, Zhang and Wang, Liang and Ye, Hangjun and Chen, Long and Li, Hongyang}, title = {SimScale: Learning to Drive via Real-World Simulation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36365-36374} }
Unified Customized Generation by Disentangled Reward Modeling: Shaojin Wu,

Mengqi Huang,

Yufeng Cheng,

Wenxu Wu,

Jiahe Tian,

Yiming Luo,

Fei Ding,

Qian He; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Shaojin and Huang, Mengqi and Cheng, Yufeng and Wu, Wenxu and Tian, Jiahe and Luo, Yiming and Ding, Fei and He, Qian}, title = {Unified Customized Generation by Disentangled Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34418-34427} }
AeroGS: Scale-Aware Gaussian Splatting for Pose-Free Dynamic UAV Scene Reconstruction: Tingyun Li,

Xinyi Liu,

Yongjun Zhang,

Yi Wan,

Xiaoan Liu,

Weiwei Fan,

Jiahao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tingyun and Liu, Xinyi and Zhang, Yongjun and Wan, Yi and Liu, Xiaoan and Fan, Weiwei and Liu, Jiahao}, title = {AeroGS: Scale-Aware Gaussian Splatting for Pose-Free Dynamic UAV Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40907-40917} }
Learning to Control Physically-simulated 3D Characters via Generating and Mimicking 2D Motions: Jianan Li,

Xiao Chen,

Tao Huang,

Tien-Tsin Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jianan and Chen, Xiao and Huang, Tao and Wong, Tien-Tsin}, title = {Learning to Control Physically-simulated 3D Characters via Generating and Mimicking 2D Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38302-38312} }
DuoMo: Dual Motion Diffusion for World-Space Human Reconstruction: Yufu Wang,

Evonne Ng,

Soyong Shin,

Rawal Khirodkar,

Yuan Dong,

Zhaoen Su,

Jinhyung Park,

Kris Kitani,

Alexander Richard,

Fabian Prada,

Michael Zollhöfer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yufu and Ng, Evonne and Shin, Soyong and Khirodkar, Rawal and Dong, Yuan and Su, Zhaoen and Park, Jinhyung and Kitani, Kris and Richard, Alexander and Prada, Fabian and Zollh\"ofer, Michael}, title = {DuoMo: Dual Motion Diffusion for World-Space Human Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42777-42788} }
Towards Decompositional Human Motion Generation with Energy-Based Diffusion Models: Jianrong Zhang,

Hehe Fan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jianrong and Fan, Hehe and Yang, Yi}, title = {Towards Decompositional Human Motion Generation with Energy-Based Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30650-30660} }
TAP: A Token-Adaptive Predictor Framework for Training-Free Diffusion Acceleration: Haowei Zhu,

Tingxuan Huang,

Xing Wang,

Tianyu Zhao,

Jiexi Wang,

Weifeng Chen,

Xurui Peng,

Fangmin Chen,

Junhai Yong,

Bin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Haowei and Huang, Tingxuan and Wang, Xing and Zhao, Tianyu and Wang, Jiexi and Chen, Weifeng and Peng, Xurui and Chen, Fangmin and Yong, Junhai and Wang, Bin}, title = {TAP: A Token-Adaptive Predictor Framework for Training-Free Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36082-36091} }
Debiased Sample Selection for Learning with Noisy Labels: Weiran Pan,

Wei Wei,

Wenfeng Xie; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Weiran and Wei, Wei and Xie, Wenfeng}, title = {Debiased Sample Selection for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32047-32057} }
SpiralDiff: Spiral Diffusion with LoRA for RGB-to-RAW Conversion Across Cameras: Huanjing Yue,

Shangbin Xie,

Cong Cao,

Qian Wu,

Lei Zhang,

Lei Zhao,

Jingyu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Huanjing and Xie, Shangbin and Cao, Cong and Wu, Qian and Zhang, Lei and Zhao, Lei and Yang, Jingyu}, title = {SpiralDiff: Spiral Diffusion with LoRA for RGB-to-RAW Conversion Across Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38453-38463} }
Inter-Photon-Limited Videography: Andrew Xie,

Dongyu Du,

Sotiris Nousias,

David B. Lindell,

Kiriakos N. Kutulakos; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Andrew and Du, Dongyu and Nousias, Sotiris and Lindell, David B. and Kutulakos, Kiriakos N.}, title = {Inter-Photon-Limited Videography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34006-34015} }
GeodesicNVS: Probability Density Geodesic Flow Matching for Novel View Synthesis: Xuqin Wang,

Tao Wu,

Yanfeng Zhang,

Lu Liu,

Mingwei Sun,

Yongliang Wang,

Niclas Zeller,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuqin and Wu, Tao and Zhang, Yanfeng and Liu, Lu and Sun, Mingwei and Wang, Yongliang and Zeller, Niclas and Cremers, Daniel}, title = {GeodesicNVS: Probability Density Geodesic Flow Matching for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40316-40326} }
InsCal: Calibrated Multi-Source Fully Test-Time Prompt Tuning for Object Detection: Xiaofan Que,

Dingrong Wang,

Xumin Liu,

Qi Yu; [pdf] [supp]
[bibtex]
@InProceedings{Que_2026_CVPR, author = {Que, Xiaofan and Wang, Dingrong and Liu, Xumin and Yu, Qi}, title = {InsCal: Calibrated Multi-Source Fully Test-Time Prompt Tuning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36936-36946} }
Mixture of States: Routing Token-Level Dynamics for Multimodal Generation: Haozhe Liu,

Ding Liu,

Mingchen Zhuge,

Zijian Zhou,

Tian Xie,

Sen He,

Yukang Yang,

Shuming Liu,

Yuren Cong,

Jiadong Guo,

Hongyu Xu,

Ke Xu,

Kam-Woh Ng,

Juan C. Perez,

Juan-Manuel Perez-Rua,

Tao Xiang,

Wei Liu,

Shikun Liu,

Jürgen Schmidhuber; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haozhe and Liu, Ding and Zhuge, Mingchen and Zhou, Zijian and Xie, Tian and He, Sen and Yang, Yukang and Liu, Shuming and Cong, Yuren and Guo, Jiadong and Xu, Hongyu and Xu, Ke and Ng, Kam-Woh and Perez, Juan C. and Perez-Rua, Juan-Manuel and Xiang, Tao and Liu, Wei and Liu, Shikun and Schmidhuber, J\"urgen}, title = {Mixture of States: Routing Token-Level Dynamics for Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36781-36792} }
Bidirectional Multimodal Prompt Learning with Scale-Aware Training for Few-Shot Multi-Class Anomaly Detection: Yujin Lee,

Sewon Kim,

Daeun Moon,

Seoyoon Jang,

Hyunsoo Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yujin and Kim, Sewon and Moon, Daeun and Jang, Seoyoon and Yoon, Hyunsoo}, title = {Bidirectional Multimodal Prompt Learning with Scale-Aware Training for Few-Shot Multi-Class Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35577-35586} }
LaDy: Lagrangian-Dynamic Informed Network for Skeleton-based Action Segmentation via Spatial-Temporal Modulation: Haoyu Ji,

Xueting Liu,

Yu Gao,

Wenze Huang,

Zhihao Yang,

Weihong Ren,

Zhiyong Wang,

Honghai Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Haoyu and Liu, Xueting and Gao, Yu and Huang, Wenze and Yang, Zhihao and Ren, Weihong and Wang, Zhiyong and Liu, Honghai}, title = {LaDy: Lagrangian-Dynamic Informed Network for Skeleton-based Action Segmentation via Spatial-Temporal Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34449-34459} }
GenTract: Generative Global Tractography: Alec Sargood,

Lemuel Puglisi,

Elinor Thompson,

Mirco Musolesi,

Daniel C. Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargood_2026_CVPR, author = {Sargood, Alec and Puglisi, Lemuel and Thompson, Elinor and Musolesi, Mirco and Alexander, Daniel C.}, title = {GenTract: Generative Global Tractography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35291-35300} }
ReCALL: Recalibrating Capability Degradation for MLLM-based Composed Image Retrieval: Tianyu Yang,

ChenWei He,

Xiangzhao Hao,

Tianyue Wang,

Jiarui Guo,

Haiyun Guo,

Leigang Qu,

Jinqiao Wang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Tianyu and He, ChenWei and Hao, Xiangzhao and Wang, Tianyue and Guo, Jiarui and Guo, Haiyun and Qu, Leigang and Wang, Jinqiao and Chua, Tat-Seng}, title = {ReCALL: Recalibrating Capability Degradation for MLLM-based Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38763-38773} }
Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training: Jinbo Xing,

Zeyinzi Jiang,

Yuxiang Tuo,

Chaojie Mao,

Xiaotang Gai,

Xi Chen,

Jingfeng Zhang,

Yulin Pan,

Zhen Han,

Jie Xiao,

Keyu Yan,

Chenwei Xie,

Chongyang Zhong,

Kai Zhu,

Tong Shen,

Lianghua Huang,

Yu Liu,

Yujiu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Jinbo and Jiang, Zeyinzi and Tuo, Yuxiang and Mao, Chaojie and Gai, Xiaotang and Chen, Xi and Zhang, Jingfeng and Pan, Yulin and Han, Zhen and Xiao, Jie and Yan, Keyu and Xie, Chenwei and Zhong, Chongyang and Zhu, Kai and Shen, Tong and Huang, Lianghua and Liu, Yu and Yang, Yujiu}, title = {Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36267-36278} }
TINA: Text-Free Inversion Attack for Unlearned Text-to-Image Diffusion Models: Qianlong Xiang,

Miao Zhang,

Haoyu Zhang,

Kun Wang,

Junhui Hou,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qianlong and Zhang, Miao and Zhang, Haoyu and Wang, Kun and Hou, Junhui and Nie, Liqiang}, title = {TINA: Text-Free Inversion Attack for Unlearned Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30076-30086} }
MUSE: Harnessing Precise and Diverse Semantics for Few-Shot Whole Slide Image Classification: Jiahao Xu,

Sheng Huang,

Xin Zhang,

Zhixiong Nan,

Jiajun Dong,

Nankun Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiahao and Huang, Sheng and Zhang, Xin and Nan, Zhixiong and Dong, Jiajun and Mu, Nankun}, title = {MUSE: Harnessing Precise and Diverse Semantics for Few-Shot Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33911-33921} }
Tutor-Student Reinforcement Learning: A Dynamic Curriculum for Robust Deepfake Detection: Zhanhe Lei,

Zhongyuan Wang,

Jikang Cheng,

Baojin Huang,

Yuhong Yang,

Zhen Han,

Chao Liang,

Dengpan Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Zhanhe and Wang, Zhongyuan and Cheng, Jikang and Huang, Baojin and Yang, Yuhong and Han, Zhen and Liang, Chao and Ye, Dengpan}, title = {Tutor-Student Reinforcement Learning: A Dynamic Curriculum for Robust Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41436-41445} }
WalkGPT: Grounded Vision-Language Conversation with Depth-Aware Segmentation for Pedestrian Navigation: Rafi Ibn Sultan,

Hui Zhu,

Xiangyu Zhou,

Chengyin Li,

Prashant Khanduri,

Marco Brocanelli,

Dongxiao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ibn_Sultan_2026_CVPR, author = {Ibn Sultan, Rafi and Zhu, Hui and Zhou, Xiangyu and Li, Chengyin and Khanduri, Prashant and Brocanelli, Marco and Zhu, Dongxiao}, title = {WalkGPT: Grounded Vision-Language Conversation with Depth-Aware Segmentation for Pedestrian Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40085-40095} }
DLVP-CLIP: Enhancing Fine-Grained Zero-Shot Anomaly Detection via Dynamic Local Visual Prompting: Gaowei Zhang,

Lihe Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gaowei and Zhang, Lihe}, title = {DLVP-CLIP: Enhancing Fine-Grained Zero-Shot Anomaly Detection via Dynamic Local Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35524-35533} }
Minimal Constraint Relaxation for Multiview Autocalibration: Norio Kosaka,

Timothy Duff,

Tomas Pajdla; [pdf] [supp]
[bibtex]
@InProceedings{Kosaka_2026_CVPR, author = {Kosaka, Norio and Duff, Timothy and Pajdla, Tomas}, title = {Minimal Constraint Relaxation for Multiview Autocalibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28937-28946} }
SODA: Sensitivity-Oriented Dynamic Acceleration for Diffusion Transformer: Tong Shao,

Yusen Fu,

Guoying Sun,

Jingde Kong,

Zhuotao Tian,

Jingyong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Tong and Fu, Yusen and Sun, Guoying and Kong, Jingde and Tian, Zhuotao and Su, Jingyong}, title = {SODA: Sensitivity-Oriented Dynamic Acceleration for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33012-33021} }
PhysHO: Physics-Based Dynamic 3D Gaussian Human and Object from Monocular Video: Suyi Jiang,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Suyi and Lee, Gim Hee}, title = {PhysHO: Physics-Based Dynamic 3D Gaussian Human and Object from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32507-32517} }
CaST-Bench: Benchmarking Causal Chain-Grounded Spatio-Temporal Reasoning for Video Question Answering: Mingfang Zhang,

Jingjing Pan,

Ashutosh Kumar,

Rajat Saini,

Mustafa Erdogan,

Hsuan-Kung Yang,

Caixin Kang,

Yifei Huang,

Yoichi Sato,

Quan Kong; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingfang and Pan, Jingjing and Kumar, Ashutosh and Saini, Rajat and Erdogan, Mustafa and Yang, Hsuan-Kung and Kang, Caixin and Huang, Yifei and Sato, Yoichi and Kong, Quan}, title = {CaST-Bench: Benchmarking Causal Chain-Grounded Spatio-Temporal Reasoning for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31856-31866} }
TTRV: Test-Time Reinforcement Learning for Vision Language Models: Akshit Singh,

Shyam Marjit,

Wei Lin,

Paul Gavrikov,

Serena Yeung-Levy,

Hilde Kuehne,

Rogerio Feris,

Sivan Doveh,

James Glass,

M. Jehanzeb Mirza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Akshit and Marjit, Shyam and Lin, Wei and Gavrikov, Paul and Yeung-Levy, Serena and Kuehne, Hilde and Feris, Rogerio and Doveh, Sivan and Glass, James and Mirza, M. Jehanzeb}, title = {TTRV: Test-Time Reinforcement Learning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33153-33163} }
CaT-GS: Efficient 3DGS Rendering for Large-Scale Scenes with Inter-frame Caching and Tile Scheduling: Tingjia Zhang,

Bo Chen,

Shengzhong Liu,

Fan Wu,

Guihai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tingjia and Chen, Bo and Liu, Shengzhong and Wu, Fan and Chen, Guihai}, title = {CaT-GS: Efficient 3DGS Rendering for Large-Scale Scenes with Inter-frame Caching and Tile Scheduling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37391-37400} }
SpatialScore: Towards Comprehensive Evaluation for Spatial Intelligence: Haoning Wu,

Xiao Huang,

Yaohui Chen,

Ya Zhang,

Yanfeng Wang,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haoning and Huang, Xiao and Chen, Yaohui and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {SpatialScore: Towards Comprehensive Evaluation for Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31029-31041} }
Enhance-then-Balance Modality Collaboration for Robust Multimodal Sentiment Analysis: Kang He,

Yuzhe Ding,

Xinrong Wang,

Fei Li,

Chong Teng,

Donghong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Kang and Ding, Yuzhe and Wang, Xinrong and Li, Fei and Teng, Chong and Ji, Donghong}, title = {Enhance-then-Balance Modality Collaboration for Robust Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30183-30193} }
SynMotion: Semantic-Visual Adaptation for Motion Customized Video Generation: Shuai Tan,

Biao Gong,

Yujie Wei,

Shiwei Zhang,

Zhuoxin Liu,

Ke Ma,

Yan Wang,

Kecheng Zheng,

Xing Zhu,

Yujun Shen,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Shuai and Gong, Biao and Wei, Yujie and Zhang, Shiwei and Liu, Zhuoxin and Ma, Ke and Wang, Yan and Zheng, Kecheng and Zhu, Xing and Shen, Yujun and Zhao, Hengshuang}, title = {SynMotion: Semantic-Visual Adaptation for Motion Customized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30477-30489} }
EvObj: Learning Evolving Object-centric Representations for 3D Instance Segmentation without Scene Supervision: Jiahao Chen,

Zihui Zhang,

Yafei Yang,

Jinxi Li,

Shenxing Wei,

Zhixuan Sun,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiahao and Zhang, Zihui and Yang, Yafei and Li, Jinxi and Wei, Shenxing and Sun, Zhixuan and Yang, Bo}, title = {EvObj: Learning Evolving Object-centric Representations for 3D Instance Segmentation without Scene Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39817-39826} }
Joint Learning of General and Diverse Patterns with Mixture of Memory Experts for Weakly-Supervised Video Anomaly Detection: Bo Sun,

Junxi Chen,

Zhe Wu,

Feng Gao,

Fan Yang,

Li Su,

Yaowei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Bo and Chen, Junxi and Wu, Zhe and Gao, Feng and Yang, Fan and Su, Li and Wang, Yaowei}, title = {Joint Learning of General and Diverse Patterns with Mixture of Memory Experts for Weakly-Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35638-35647} }
Mask to Align, Weight to Disambiguate: Reliable Unsupervised Cross-Modal Hashing with Masked-Weight Contrast: Fan Yang,

Yuanzhi Zhao,

Haimei Zhao,

Yudong Zhao,

Haikun Xu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Fan and Zhao, Yuanzhi and Zhao, Haimei and Zhao, Yudong and Xu, Haikun}, title = {Mask to Align, Weight to Disambiguate: Reliable Unsupervised Cross-Modal Hashing with Masked-Weight Contrast}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30151-30161} }
Grounding Everything in Tokens for Multimodal Large Language Models: Xiangxuan Ren,

Zhongdao Wang,

Liping Hou,

Pin Tang,

Guoqing Wang,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Xiangxuan and Wang, Zhongdao and Hou, Liping and Tang, Pin and Wang, Guoqing and Ma, Chao}, title = {Grounding Everything in Tokens for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41171-41181} }
Mind the Generative Details: Direct Localized Detail Preference Optimization for Video Diffusion Models: Zitong Huang,

Kaidong Zhang,

Yukang Ding,

Chao Gao,

Rui Ding,

Ying Chen,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zitong and Zhang, Kaidong and Ding, Yukang and Gao, Chao and Ding, Rui and Chen, Ying and Zuo, Wangmeng}, title = {Mind the Generative Details: Direct Localized Detail Preference Optimization for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35998-36008} }
Duala: Dual-Level Alignment of Subjects and Stimuli for Cross-Subject fMRI Decoding: Shumeng Li,

Jintao Guo,

Jian Zhang,

Yulin Zhou,

Luyang Cao,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shumeng and Guo, Jintao and Zhang, Jian and Zhou, Yulin and Cao, Luyang and Shi, Yinghuan}, title = {Duala: Dual-Level Alignment of Subjects and Stimuli for Cross-Subject fMRI Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42722-42731} }
Affordance Field Intervention: Enabling VLAs to Escape Memory Traps in Robotic Manipulation: Siyu Xu,

Zijian Wang,

Yunke Wang,

Chenghao Xia,

Tao Huang,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Siyu and Wang, Zijian and Wang, Yunke and Xia, Chenghao and Huang, Tao and Xu, Chang}, title = {Affordance Field Intervention: Enabling VLAs to Escape Memory Traps in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37206-37215} }
CAR-SAM: Cross-Attention Reconstruction for Post-Training Quantization of the Segment Anything Model: Houji Wen,

Jiangyong Yu,

Dawei Yang,

Jun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Houji and Yu, Jiangyong and Yang, Dawei and Li, Jun}, title = {CAR-SAM: Cross-Attention Reconstruction for Post-Training Quantization of the Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33632-33641} }
Evidential Neural Radiance Fields: Ruxiao Duan,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Ruxiao and Wong, Alex}, title = {Evidential Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28632-28641} }
ThinkingViT: Matryoshka Thinking Vision Transformer for Elastic Inference: Ali Hojjat,

Janek Haberer,

Sören Pirk,

Olaf Landsiedel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hojjat_2026_CVPR, author = {Hojjat, Ali and Haberer, Janek and Pirk, S\"oren and Landsiedel, Olaf}, title = {ThinkingViT: Matryoshka Thinking Vision Transformer for Elastic Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41923-41933} }
Progress by Pieces: Test-Time Scaling for Autoregressive Image Generation: Joonhyung Park,

Hyeongwon Jang,

Joowon Kim,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Joonhyung and Jang, Hyeongwon and Kim, Joowon and Yang, Eunho}, title = {Progress by Pieces: Test-Time Scaling for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38091-38100} }
DreamStyle: A Unified Framework for Video Stylization: Mengtian Li,

Jinshu Chen,

Songtao Zhao,

Wanquan Feng,

Pengqi Tu,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Chen, Jinshu and Zhao, Songtao and Feng, Wanquan and Tu, Pengqi and He, Qian}, title = {DreamStyle: A Unified Framework for Video Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36019-36029} }
Uncertainty-Aware Modality Fusion for Unaligned RGB-T Salient Object Detection: Mianzhao Wang,

Fan Shi,

Xu Cheng,

Chen Jia,

Shengyong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Mianzhao and Shi, Fan and Cheng, Xu and Jia, Chen and Chen, Shengyong}, title = {Uncertainty-Aware Modality Fusion for Unaligned RGB-T Salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41489-41498} }
Texvent: Asynchronous Event Data Simulation via Text Prompt: Ruofei Wang,

Peiqi Duan,

Ka Chun Cheung,

Simon See,

Boxin Shi,

Renjie Wan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ruofei and Duan, Peiqi and Cheung, Ka Chun and See, Simon and Shi, Boxin and Wan, Renjie}, title = {Texvent: Asynchronous Event Data Simulation via Text Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36375-36384} }
Coupled Diffusion Sampling for Training-Free Multi-View Image Editing: Hadi Alzayer,

Yunzhi Zhang,

Chen Geng,

Jia-Bin Huang,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alzayer_2026_CVPR, author = {Alzayer, Hadi and Zhang, Yunzhi and Geng, Chen and Huang, Jia-Bin and Wu, Jiajun}, title = {Coupled Diffusion Sampling for Training-Free Multi-View Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43686-43696} }
Video Panels for Long Video Understanding: Lars Doorenbos,

Federico Spurio,

Juergen Gall; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Doorenbos_2026_CVPR, author = {Doorenbos, Lars and Spurio, Federico and Gall, Juergen}, title = {Video Panels for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31293-31303} }
Learning Transferable Temporal Primitives for Video Reasoning via Synthetic Videos: Songtao Jiang,

Sibo Song,

Chenyi Zhou,

Yuan Wang,

Ruizhe Chen,

Tongkun Guan,

Ruilin Luo,

Yan Zhang,

Zhihang Tang,

Yuchong Sun,

Hang Zhang,

Zhibo Yang,

Shuai Bai,

Junyang Lin,

Zuozhu Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Songtao and Song, Sibo and Zhou, Chenyi and Wang, Yuan and Chen, Ruizhe and Guan, Tongkun and Luo, Ruilin and Zhang, Yan and Tang, Zhihang and Sun, Yuchong and Zhang, Hang and Yang, Zhibo and Bai, Shuai and Lin, Junyang and Liu, Zuozhu}, title = {Learning Transferable Temporal Primitives for Video Reasoning via Synthetic Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31283-31292} }
Understanding, Accelerating, and Improving MeanFlow Training: Jin-Young Kim,

Hyojun Go,

Lea Bogensperger,

Julius Erbach,

Nikolai Kalischek,

Federico Tombari,

Konrad Schindler,

Dominik Narnhofer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jin-Young and Go, Hyojun and Bogensperger, Lea and Erbach, Julius and Kalischek, Nikolai and Tombari, Federico and Schindler, Konrad and Narnhofer, Dominik}, title = {Understanding, Accelerating, and Improving MeanFlow Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37992-38003} }
VisualOverload: Probing Visual Understanding of VLMs in Really Dense Scenes: Paul Gavrikov,

Wei Lin,

M. Jehanzeb Mirza,

Soumya Jahagirdar,

Muhammad Huzaifa,

Sivan Doveh,

James Glass,

Serena Yeung-Levy,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gavrikov_2026_CVPR, author = {Gavrikov, Paul and Lin, Wei and Mirza, M. Jehanzeb and Jahagirdar, Soumya and Huzaifa, Muhammad and Doveh, Sivan and Glass, James and Yeung-Levy, Serena and Kuehne, Hilde}, title = {VisualOverload: Probing Visual Understanding of VLMs in Really Dense Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40833-40844} }
STRNet: Visual Navigation with Spatio-Temporal Representation through Dynamic Graph Aggregation: Hao Ren,

Zetong Bi,

Yiming Zeng,

Zhaoliang Wan,

Lu Qi,

Hui Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Hao and Bi, Zetong and Zeng, Yiming and Wan, Zhaoliang and Qi, Lu and Cheng, Hui}, title = {STRNet: Visual Navigation with Spatio-Temporal Representation through Dynamic Graph Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42464-42473} }
From 2D Alignment to 3D Plausibility: Unifying Heterogeneous 2D Priors and Penetration-Free Diffusion for Occlusion-Robust Two-Hand Reconstruction: Gaoge Han,

Yongkang Cheng,

Zhe Chen,

Shaoli Huang,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Gaoge and Cheng, Yongkang and Chen, Zhe and Huang, Shaoli and Liu, Tongliang}, title = {From 2D Alignment to 3D Plausibility: Unifying Heterogeneous 2D Priors and Penetration-Free Diffusion for Occlusion-Robust Two-Hand Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42800-42809} }
Video-CoE: Reinforcing Video Event Prediction via Chain of Events: Qile Su,

Jing Tang,

Rui Chen,

Lei Sun,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Qile and Tang, Jing and Chen, Rui and Sun, Lei and Chu, Xiangxiang}, title = {Video-CoE: Reinforcing Video Event Prediction via Chain of Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32818-32828} }
FiDeSR: High-Fidelity and Detail-Preserving One-Step Diffusion Super-Resolution: Aro Kim,

Myeongjin Jang,

Chaewon Moon,

Youngjin Shin,

Jinwoo Jeong,

Sang-hyo Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Aro and Jang, Myeongjin and Moon, Chaewon and Shin, Youngjin and Jeong, Jinwoo and Park, Sang-hyo}, title = {FiDeSR: High-Fidelity and Detail-Preserving One-Step Diffusion Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38270-38280} }
QuantVLA: Scale-Calibrated Post-Training Quantization for Vision-Language-Action Models: Jingxuan Zhang,

Yunta Hsieh,

Zhongwei Wan,

Haokun Lin,

Xin Wang,

Ziqi Wang,

Yingtie Lei,

Mi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingxuan and Hsieh, Yunta and Wan, Zhongwei and Lin, Haokun and Wang, Xin and Wang, Ziqi and Lei, Yingtie and Zhang, Mi}, title = {QuantVLA: Scale-Calibrated Post-Training Quantization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39539-39549} }
ImmerIris: A Large-Scale Dataset and Benchmark for Off-Axis and Unconstrained Iris Recognition in Immersive Applications: Yuxi Mi,

Qiuyang Yuan,

Zhizhou Zhong,

Xuan Zhao,

Jiaogen Zhou,

Fubao Zhu,

Jihong Guan,

Shuigeng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2026_CVPR, author = {Mi, Yuxi and Yuan, Qiuyang and Zhong, Zhizhou and Zhao, Xuan and Zhou, Jiaogen and Zhu, Fubao and Guan, Jihong and Zhou, Shuigeng}, title = {ImmerIris: A Large-Scale Dataset and Benchmark for Off-Axis and Unconstrained Iris Recognition in Immersive Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28838-28847} }
Foundation Model Priors Enhance Object Focus in Feature Space for Source-Free Object Detection: Sairam VCR,

Rishabh Lalla,

Aveen Dayal,

Tejal Kulkarni,

Anuj Lalla,

Vineeth N. Balasubramanian,

Muhammad Haris Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{VCR_2026_CVPR, author = {VCR, Sairam and Lalla, Rishabh and Dayal, Aveen and Kulkarni, Tejal and Lalla, Anuj and Balasubramanian, Vineeth N. and Khan, Muhammad Haris}, title = {Foundation Model Priors Enhance Object Focus in Feature Space for Source-Free Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29431-29440} }
Reading or Reasoning? Format Decoupled Reinforcement Learning for Document OCR: Yufeng Zhong,

Lei Chen,

Zhixiong Zeng,

Xuanle Zhao,

Deyang Jiang,

Liming Zheng,

Jing Huang,

Haibo Qiu,

Peng Shi,

Siqi Yang,

Lin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yufeng and Chen, Lei and Zeng, Zhixiong and Zhao, Xuanle and Jiang, Deyang and Zheng, Liming and Huang, Jing and Qiu, Haibo and Shi, Peng and Yang, Siqi and Ma, Lin}, title = {Reading or Reasoning? Format Decoupled Reinforcement Learning for Document OCR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33164-33173} }
$\phi$-DPO: Fairness Direct Preference Optimization Approach to Continual Learning in Large Multimodal Models: Thanh-Dat Truong,

Huu-Thien Tran,

Jackson Cothren,

Bhiksha Raj,

Khoa Luu; [pdf] [supp]
[bibtex]
@InProceedings{Truong_2026_CVPR, author = {Truong, Thanh-Dat and Tran, Huu-Thien and Cothren, Jackson and Raj, Bhiksha and Luu, Khoa}, title = {\${\textbackslash}phi\$-DPO: Fairness Direct Preference Optimization Approach to Continual Learning in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39922-39934} }
PhyCo: Learning Controllable Physical Priors for Generative Motion: Sriram Narayanan,

Ziyu Jiang,

Srinivasa Narasimhan,

Manmohan Chandraker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Narayanan_2026_CVPR, author = {Narayanan, Sriram and Jiang, Ziyu and Narasimhan, Srinivasa and Chandraker, Manmohan}, title = {PhyCo: Learning Controllable Physical Priors for Generative Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41892-41902} }
Learning Straight Flows: Variational Flow Matching for Efficient Generation: Chenrui Ma,

Xi Xiao,

Tianyang Wang,

Xiao Wang,

Yanning Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Chenrui and Xiao, Xi and Wang, Tianyang and Wang, Xiao and Shen, Yanning}, title = {Learning Straight Flows: Variational Flow Matching for Efficient Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38154-38164} }
DCoAR: Deep Concept Injection into Unified Autoregressive Models for Personalized Text-to-Image Generation: Fangtai Wu,

Mushui Liu,

Weijie He,

Zhao Wang,

Yunlong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Fangtai and Liu, Mushui and He, Weijie and Wang, Zhao and Yu, Yunlong}, title = {DCoAR: Deep Concept Injection into Unified Autoregressive Models for Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29265-29274} }
Thinking with Video: Video Generation as a Promising Multimodal Reasoning Paradigm: Jingqi Tong,

Yurong Mou,

Hangcheng Li,

Mingzhe Li,

Yongzhuo Yang,

Ming Zhang,

Qiguang Chen,

Tianyi Liang,

Xiaomeng Hu,

Yining Zheng,

Xinchi Chen,

Jun Zhao,

Xuanjing Huang,

Xipeng Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2026_CVPR, author = {Tong, Jingqi and Mou, Yurong and Li, Hangcheng and Li, Mingzhe and Yang, Yongzhuo and Zhang, Ming and Chen, Qiguang and Liang, Tianyi and Hu, Xiaomeng and Zheng, Yining and Chen, Xinchi and Zhao, Jun and Huang, Xuanjing and Qiu, Xipeng}, title = {Thinking with Video: Video Generation as a Promising Multimodal Reasoning Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41121-41129} }
ID-Crafter: VLM-Grounded Online RL for Compositional Multi-Subject Video Generation: Panwang Pan,

Jingjing Zhao,

Yuchen Lin,

Chenguo Lin,

Chenxin Li,

Hengyu Liu,

Tingting Shen,

Yadong Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Panwang and Zhao, Jingjing and Lin, Yuchen and Lin, Chenguo and Li, Chenxin and Liu, Hengyu and Shen, Tingting and Mu, Yadong}, title = {ID-Crafter: VLM-Grounded Online RL for Compositional Multi-Subject Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36627-36637} }
Order Matters: 3D Shape Generation from Sequential VR Sketches: Yizi Chen,

Sidi Wu,

Tianyi Xiao,

Nina Wiedemann,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yizi and Wu, Sidi and Xiao, Tianyi and Wiedemann, Nina and Landrieu, Loic}, title = {Order Matters: 3D Shape Generation from Sequential VR Sketches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34259-34269} }
DF^2-VB: Dual-level Fuzzy Fusion with View-specific Boosting for Multi-view Multi-label Classification: Yuena Lin,

Haichun Cai,

Yi Shan,

Hao Wei,

Yongjian Deng,

Zhen Yang,

Gengyu Lyu; [pdf]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yuena and Cai, Haichun and Shan, Yi and Wei, Hao and Deng, Yongjian and Yang, Zhen and Lyu, Gengyu}, title = {DF{\textasciicircum}2-VB: Dual-level Fuzzy Fusion with View-specific Boosting for Multi-view Multi-label Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33859-33868} }
GeoSURGE: Geo-localization using Semantic Fusion with Hierarchy of Geographic Embeddings: Angel Daruna,

Nicholas Meegan,

Han-Pang Chiu,

Supun Samarasekera,

Rakesh Kumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Daruna_2026_CVPR, author = {Daruna, Angel and Meegan, Nicholas and Chiu, Han-Pang and Samarasekera, Supun and Kumar, Rakesh}, title = {GeoSURGE: Geo-localization using Semantic Fusion with Hierarchy of Geographic Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41396-41405} }
Taxonomy-Aware Representation Alignment for Hierarchical Visual Recognition with Large Multimodal Models: Hulingxiao He,

Zhi Tan,

Yuxin Peng; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Hulingxiao and Tan, Zhi and Peng, Yuxin}, title = {Taxonomy-Aware Representation Alignment for Hierarchical Visual Recognition with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31124-31134} }
WorldLens: Full-Spectrum Evaluations of Driving World Models in Real World: Ao Liang,

Lingdong Kong,

Tianyi Yan,

Hongsi Liu,

Yu Yang,

Ziqi Huang,

Wei Yin,

Jialong Zuo,

Yixuan Hu,

Dekai Zhu,

Dongyue Lu,

Youquan Liu,

Guangfeng Jiang,

Linfeng Li,

Xiangtai Li,

Long Zhuo,

Lai Xing Ng,

Benoit R. Cottereau,

Changxin Gao,

Liang Pan,

Wei Tsang Ooi,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Ao and Kong, Lingdong and Yan, Tianyi and Liu, Hongsi and Yang, Yu and Huang, Ziqi and Yin, Wei and Zuo, Jialong and Hu, Yixuan and Zhu, Dekai and Lu, Dongyue and Liu, Youquan and Jiang, Guangfeng and Li, Linfeng and Li, Xiangtai and Zhuo, Long and Ng, Lai Xing and Cottereau, Benoit R. and Gao, Changxin and Pan, Liang and Ooi, Wei Tsang and Liu, Ziwei}, title = {WorldLens: Full-Spectrum Evaluations of Driving World Models in Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36385-36399} }
DiffGraph: An Automated Agent-driven Model Merging Framework for In-the-Wild Text-to-Image Generation: Zhuoling Li,

Hossein Rahmani,

Jiarui Zhang,

Yu Xue,

Majid Mirmehdi,

Jason Kuen,

Jiuxiang Gu,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhuoling and Rahmani, Hossein and Zhang, Jiarui and Xue, Yu and Mirmehdi, Majid and Kuen, Jason and Gu, Jiuxiang and Liu, Jun}, title = {DiffGraph: An Automated Agent-driven Model Merging Framework for In-the-Wild Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36713-36723} }
Tracking-Guided 4D Generation: Foundation-Tracker Motion Priors for 3D Model Animation: Su Sun,

Cheng Zhao,

Himangi Mittal,

Gaurav Mittal,

Rohith Kukkala,

Yingjie Victor Chen,

Mei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Su and Zhao, Cheng and Mittal, Himangi and Mittal, Gaurav and Kukkala, Rohith and Chen, Yingjie Victor and Chen, Mei}, title = {Tracking-Guided 4D Generation: Foundation-Tracker Motion Priors for 3D Model Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40992-41001} }
Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling: Valter Piedade,

Lalit Manam,

Masashi Yamazaki,

Pedro Miraldo; [pdf] [supp]
[bibtex]
@InProceedings{Piedade_2026_CVPR, author = {Piedade, Valter and Manam, Lalit and Yamazaki, Masashi and Miraldo, Pedro}, title = {Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28925-28936} }
RobustVisRAG: Causality-Aware Vision-Based Retrieval-Augmented Generation under Visual Degradations: I-Hsiang Chen,

Yu-Wei Liu,

Tse-Yu Wu,

Yu-Chien Chiang,

Jen-Chieh Yang,

Wei-Ting Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, I-Hsiang and Liu, Yu-Wei and Wu, Tse-Yu and Chiang, Yu-Chien and Yang, Jen-Chieh and Chen, Wei-Ting}, title = {RobustVisRAG: Causality-Aware Vision-Based Retrieval-Augmented Generation under Visual Degradations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40802-40811} }
Edit2Perceive: Image Editing Diffusion Models Are Strong Dense Perceivers: Yiqing Shi,

Yiren Song,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yiqing and Song, Yiren and Shou, Mike Zheng}, title = {Edit2Perceive: Image Editing Diffusion Models Are Strong Dense Perceivers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43568-43577} }
SWIFT: Sliding Window Reconstruction for Few-Shot Training-Free Generated Video Attribution: Chao Wang,

Zijin Yang,

Yaofei Wang,

Yuang Qi,

Weiming Zhang,

Nenghai Yu,

Kejiang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chao and Yang, Zijin and Wang, Yaofei and Qi, Yuang and Zhang, Weiming and Yu, Nenghai and Chen, Kejiang}, title = {SWIFT: Sliding Window Reconstruction for Few-Shot Training-Free Generated Video Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31725-31734} }
Mapping Networks: Lord Sen,

Shyamapada Mukherjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sen_2026_CVPR, author = {Sen, Lord and Mukherjee, Shyamapada}, title = {Mapping Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36215-36223} }
MotionMaster: Generalizable Text-Driven Motion Generation and Editing: Nan Jiang,

Yunhao Li,

Lexi Pang,

Zimo He,

Siyuan Huang,

Yixin Zhu; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nan and Li, Yunhao and Pang, Lexi and He, Zimo and Huang, Siyuan and Zhu, Yixin}, title = {MotionMaster: Generalizable Text-Driven Motion Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30629-30639} }
LATA: Laplacian-Assisted Transductive Adaptation for Conformal Uncertainty in Medical VLMs: Behzad Bozorgtabar,

Dwarikanath Mahapatra,

Sudipta Roy,

Muzammal Naseer,

Imran Razzak,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bozorgtabar_2026_CVPR, author = {Bozorgtabar, Behzad and Mahapatra, Dwarikanath and Roy, Sudipta and Naseer, Muzammal and Razzak, Imran and Ge, Zongyuan}, title = {LATA: Laplacian-Assisted Transductive Adaptation for Conformal Uncertainty in Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36311-36320} }
Decouple to Generalize: Context-First Self-Evolving Learning for Data-Scarce Vision-Language Reasoning: Tingyu Li,

Zheng Sun,

Jingxuan Wei,

Conghui He,

Lijun Wu,

Cheng Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tingyu and Sun, Zheng and Wei, Jingxuan and He, Conghui and Wu, Lijun and Tan, Cheng}, title = {Decouple to Generalize: Context-First Self-Evolving Learning for Data-Scarce Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29357-29366} }
MORE-STEM: Long-Short MemOry REcall and Spatio-TEmporal Consistency Model for Query-Driven 3D/4D Point Cloud Segmentation: Chade Li,

Haida Feng,

Pengju Zhang,

Yihong Wu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chade and Feng, Haida and Zhang, Pengju and Wu, Yihong}, title = {MORE-STEM: Long-Short MemOry REcall and Spatio-TEmporal Consistency Model for Query-Driven 3D/4D Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31462-31471} }
BiEvLight: Bi-level Learning of Task-Aware Event Refinement for Low-Light Image Enhancement: Zishu Yao,

Xiang-Xiang Su,

Shengning Zhou,

Guang-Yong Chen,

Guodong Fan,

Xing Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Zishu and Su, Xiang-Xiang and Zhou, Shengning and Chen, Guang-Yong and Fan, Guodong and Chen, Xing}, title = {BiEvLight: Bi-level Learning of Task-Aware Event Refinement for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41541-41550} }
Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation: Tzu Ling Liu,

Ian Stavness,

Mrigank Rochan; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tzu Ling and Stavness, Ian and Rochan, Mrigank}, title = {Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31262-31271} }
RxnCaption: Reformulating Reaction Diagram Parsing as Visual Prompt Guided Captioning: Jiahe Song,

Chuang Wang,

Bowen Jiang,

Yinfan Wang,

Hao Zheng,

Xingjian Wei,

Chengjin Liu,

Rui Nie,

Junyuan Gao,

Jiaxing Sun,

Yubin Wang,

Lijun Wu,

Zhenhua Huang,

Jiang Wu,

Qian Yu,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Jiahe and Wang, Chuang and Jiang, Bowen and Wang, Yinfan and Zheng, Hao and Wei, Xingjian and Liu, Chengjin and Nie, Rui and Gao, Junyuan and Sun, Jiaxing and Wang, Yubin and Wu, Lijun and Huang, Zhenhua and Wu, Jiang and Yu, Qian and He, Conghui}, title = {RxnCaption: Reformulating Reaction Diagram Parsing as Visual Prompt Guided Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30864-30873} }
Multi-Patch Global-to-Local Transformer Architecture For Efficient Flow Matching and Diffusion Model: Quan Dao,

Dimitris Metaxas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dao_2026_CVPR, author = {Dao, Quan and Metaxas, Dimitris}, title = {Multi-Patch Global-to-Local Transformer Architecture For Efficient Flow Matching and Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33000-33011} }
VidEoMT: Your ViT is Secretly Also a Video Segmentation Model: Narges Norouzi,

Idil Esen Zulfikar,

Niccolò Cavagnero,

Tommie Kerssies,

Bastian Leibe,

Gijs Dubbelman,

Daan de Geus; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Norouzi_2026_CVPR, author = {Norouzi, Narges and Zulfikar, Idil Esen and Cavagnero, Niccol\`o and Kerssies, Tommie and Leibe, Bastian and Dubbelman, Gijs and de Geus, Daan}, title = {VidEoMT: Your ViT is Secretly Also a Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35177-35186} }
HQC-NBV: A Hybrid Quantum-Classical View Planning Approach: Xiaotong Yu,

Chang Wen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xiaotong and Chen, Chang Wen}, title = {HQC-NBV: A Hybrid Quantum-Classical View Planning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35091-35100} }
Multi-view Consistent 3D Gaussian Head Avatars 'without' Multi-view Generation: Aviral Chharia,

Fernando De la Torre; [pdf]
[bibtex]
@InProceedings{Chharia_2026_CVPR, author = {Chharia, Aviral and De la Torre, Fernando}, title = {Multi-view Consistent 3D Gaussian Head Avatars 'without' Multi-view Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40163-40174} }
D2T2 - Multimodal Automated Planning for Brachytherapy: Lance C. Moore,

Aranyo Mitra,

Ryan Truong,

Karoline Kallis,

Kelly Kisling,

Sandra M. Meyers,

Nuno Vasconcelos; [pdf] [supp]
[bibtex]
@InProceedings{Moore_2026_CVPR, author = {Moore, Lance C. and Mitra, Aranyo and Truong, Ryan and Kallis, Karoline and Kisling, Kelly and Meyers, Sandra M. and Vasconcelos, Nuno}, title = {D2T2 - Multimodal Automated Planning for Brachytherapy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42670-42680} }
Toward Early Quality Assessment of Text-to-Image Diffusion Models: Huanlei Guo,

Hongxin Wei,

Bingyi Jing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Huanlei and Wei, Hongxin and Jing, Bingyi}, title = {Toward Early Quality Assessment of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38410-38419} }
Semantic Foam: Unifying Spatial and Semantic Scene Decomposition: Amr Sharafeldin,

Aryan Mikaeili,

Thomas Walker,

Shrisudhan Govindarajan,

Daniel Rebain,

Kwang Moo Yi,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharafeldin_2026_CVPR, author = {Sharafeldin, Amr and Mikaeili, Aryan and Walker, Thomas and Govindarajan, Shrisudhan and Rebain, Daniel and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Semantic Foam: Unifying Spatial and Semantic Scene Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29814-29823} }
Mantis: A Versatile Vision-Language-Action Model with Disentangled Visual Foresight: Yi Yang,

Xueqi Li,

Yiyang Chen,

Jin Song,

Yihan Wang,

Zipeng Xiao,

Jiadi Su,

You Qiaoben,

Pengfei Liu,

Zhijie Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Li, Xueqi and Chen, Yiyang and Song, Jin and Wang, Yihan and Xiao, Zipeng and Su, Jiadi and Qiaoben, You and Liu, Pengfei and Deng, Zhijie}, title = {Mantis: A Versatile Vision-Language-Action Model with Disentangled Visual Foresight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42505-42515} }
DetectSCI: Toward Object-Guided ROI Reconstruction for High-Resolution Video Snapshot Compressive Imaging: Xingjian Jiang,

Lishun Wang,

Ping Wang,

Xin Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xingjian and Wang, Lishun and Wang, Ping and Yuan, Xin}, title = {DetectSCI: Toward Object-Guided ROI Reconstruction for High-Resolution Video Snapshot Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41720-41729} }
Explaining CLIP Zero-shot Predictions Through Concepts: Onat Ozdemir,

Anders Christensen,

Stephan Alaniz,

Zeynep Akata,

Emre Akbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ozdemir_2026_CVPR, author = {Ozdemir, Onat and Christensen, Anders and Alaniz, Stephan and Akata, Zeynep and Akbas, Emre}, title = {Explaining CLIP Zero-shot Predictions Through Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31336-31345} }
SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection: Sandro Papais,

Lezhou Feng,

Charles Cossette,

Lingting Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Papais_2026_CVPR, author = {Papais, Sandro and Feng, Lezhou and Cossette, Charles and Ge, Lingting}, title = {SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40751-40761} }
Consensus vs. Controversy: Mapping the Decision Space Where Architectures Diverge: Minhyeok Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {Consensus vs. Controversy: Mapping the Decision Space Where Architectures Diverge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34566-34574} }
SIR: Structured Image Representations for Explainable Robot Learning: Paul Mattes,

Jan Schwab,

Jens Bosch,

Maximilian Xiling Li,

Nils Blank,

Minh-Trung Tang,

Moritz Haberland,

Rudolf Lioutikov; [pdf] [supp]
[bibtex]
@InProceedings{Mattes_2026_CVPR, author = {Mattes, Paul and Schwab, Jan and Bosch, Jens and Li, Maximilian Xiling and Blank, Nils and Tang, Minh-Trung and Haberland, Moritz and Lioutikov, Rudolf}, title = {SIR: Structured Image Representations for Explainable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42484-42493} }
Unified Generation and Self-Verification for Vision-Language Models via Advantage Decoupled Preference Optimization: Xinyu Qiu,

Heng Jia,

Zhengwen Zeng,

Shuheng Shen,

Changhua Meng,

Yi Yang,

Linchao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xinyu and Jia, Heng and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Yang, Yi and Zhu, Linchao}, title = {Unified Generation and Self-Verification for Vision-Language Models via Advantage Decoupled Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36916-36925} }
Towards Calibrating Prompt Tuning of Vision- Language Models: Ashshak Sharifdeen,

Fahad Shamshad,

Muhammad Akhtar Munir,

Abhishek Basu,

Mohamed Ismithdeen,

Jeyapriyan Jeyamohan,

Chathurika Silva,

Karthik Nandakumar,

Muhammad Haris Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharifdeen_2026_CVPR, author = {Sharifdeen, Ashshak and Shamshad, Fahad and Munir, Muhammad Akhtar and Basu, Abhishek and Ismithdeen, Mohamed and Jeyamohan, Jeyapriyan and Silva, Chathurika and Nandakumar, Karthik and Khan, Muhammad Haris}, title = {Towards Calibrating Prompt Tuning of Vision- Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39131-39140} }
Rethinking Glyph Spatial Information in Font Generation: Peng Su,

Xi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Peng and Yang, Xi}, title = {Rethinking Glyph Spatial Information in Font Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29190-29199} }
Revisiting Multimodal KV Cache Compression: A Frequency-Domain-Guided Outlier-KV-Aware Approach: Yaoxin Yang,

Peng Ye,

Xudong Tan,

Chongjun Tu,

Maosen Zhao,

Jia Hao,

Tao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yaoxin and Ye, Peng and Tan, Xudong and Tu, Chongjun and Zhao, Maosen and Hao, Jia and Chen, Tao}, title = {Revisiting Multimodal KV Cache Compression: A Frequency-Domain-Guided Outlier-KV-Aware Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39550-39560} }
Dynamic Important Example Mining for Reinforcement Finetuning: Haoru Tan,

Sitong Wu,

Yanfeng Chen,

Shizhen Zhao,

Yang-Tian Sun,

Tianjia Liu,

Chirui Chang,

Shaofeng Zhang,

Samm Sun,

Xiuzhe Wu,

Ruobing Xie,

Xiaojuan Qi; [pdf]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Haoru and Wu, Sitong and Chen, Yanfeng and Zhao, Shizhen and Sun, Yang-Tian and Liu, Tianjia and Chang, Chirui and Zhang, Shaofeng and Sun, Samm and Wu, Xiuzhe and Xie, Ruobing and Qi, Xiaojuan}, title = {Dynamic Important Example Mining for Reinforcement Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41456-41466} }
SPOT: Spatiotemporal Prompt Optimization for Motion-Stabilized MLLM-Guided Video Segmentation: Jiayi Fan,

Zheyun Qin,

Xiaoming Xi,

Xiushan Nie,

Yilong Yin; [pdf]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Jiayi and Qin, Zheyun and Xi, Xiaoming and Nie, Xiushan and Yin, Yilong}, title = {SPOT: Spatiotemporal Prompt Optimization for Motion-Stabilized MLLM-Guided Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32236-32245} }
Perceptual-Evidence Anchored Reinforced Learning for Multimodal Reasoning: Chi Zhang,

Haibo Qiu,

Qiming Zhang,

Yufei Xu,

Zhixiong Zeng,

Siqi Yang,

Peng Shi,

Lin Ma,

Jing Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Xu, Yufei and Zeng, Zhixiong and Yang, Siqi and Shi, Peng and Ma, Lin and Zhang, Jing}, title = {Perceptual-Evidence Anchored Reinforced Learning for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41111-41120} }
Live Interactive Training for Video Segmentation: Xinyu Yang,

Haozheng Yu,

Yihong Sun,

Bharath Hariharan,

Jennifer J. Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xinyu and Yu, Haozheng and Sun, Yihong and Hariharan, Bharath and Sun, Jennifer J.}, title = {Live Interactive Training for Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39827-39837} }
Learning Long-term Motion Embeddings for Efficient Kinematics Generation: Nick Stracke,

Kolja Bauer,

Stefan Andreas Baumann,

Miguel Ángel Bautista,

Josh Susskind,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stracke_2026_CVPR, author = {Stracke, Nick and Bauer, Kolja and Baumann, Stefan Andreas and Bautista, Miguel \'Angel and Susskind, Josh and Ommer, Bj\"orn}, title = {Learning Long-term Motion Embeddings for Efficient Kinematics Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42581-42591} }
MedFG-VQA: Low-Frequency Memory and Graph Attention for Lightweight Medical VQA: Haowen Gu,

Gensheng Pei,

Zeren Sun,

Mingwu Ren,

Xiangbo Shu,

Yazhou Yao,

Fumin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Haowen and Pei, Gensheng and Sun, Zeren and Ren, Mingwu and Shu, Xiangbo and Yao, Yazhou and Shen, Fumin}, title = {MedFG-VQA: Low-Frequency Memory and Graph Attention for Lightweight Medical VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42755-42764} }
FastRef: Fast Prototype Refinement for Few-shot Industrial Anomaly Detection: Yufei Li,

Long Tian,

Yuyang Dai,

Wenchao Chen,

Liang Bao,

Xiyang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yufei and Tian, Long and Dai, Yuyang and Chen, Wenchao and Bao, Liang and Liu, Xiyang}, title = {FastRef: Fast Prototype Refinement for Few-shot Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43040-43049} }
SaPaVe: Towards Active Perception and Manipulation in Vision-Language Action Models for Robotics: Mengzhen Liu,

Enshen Zhou,

Cheng Chi,

Yi Han,

Shanyu Rong,

Liming Chen,

Pengwei Wang,

Zhongyuan Wang,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mengzhen and Zhou, Enshen and Chi, Cheng and Han, Yi and Rong, Shanyu and Chen, Liming and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {SaPaVe: Towards Active Perception and Manipulation in Vision-Language Action Models for Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37164-37174} }
Understanding Task Transfer in Vision-Language Models: Bhuvan Sachdeva,

Karan Uppal,

Abhinav Java,

Vineeth N. Balasubramanian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sachdeva_2026_CVPR, author = {Sachdeva, Bhuvan and Uppal, Karan and Java, Abhinav and Balasubramanian, Vineeth N.}, title = {Understanding Task Transfer in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28754-28763} }
The Universal Normal Embedding: Chen Tasker,

Roy Betser,

Eyal Gofer,

Meir Yossef Levi,

Guy Gilboa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tasker_2026_CVPR, author = {Tasker, Chen and Betser, Roy and Gofer, Eyal and Levi, Meir Yossef and Gilboa, Guy}, title = {The Universal Normal Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32015-32025} }
PFGNet: A Fully Convolutional Frequency-Guided Peripheral Gating Network for Efficient Spatiotemporal Predictive Learning: Xinyong Cai,

Changbin Sun,

Yong Wang,

Hongyu Yang,

Yuankai Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Xinyong and Sun, Changbin and Wang, Yong and Yang, Hongyu and Wu, Yuankai}, title = {PFGNet: A Fully Convolutional Frequency-Guided Peripheral Gating Network for Efficient Spatiotemporal Predictive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38848-38858} }
RoSAMDepth: Robust Self-supervised Depth Estimation Leveraging Segment Anything Model: Xuanang Gao,

Zhiwei Ning,

Gengming Zhang,

Jiaxi Cao,

Runze Yang,

Zhonglong Zheng,

Jie Yang,

Rong Xiao,

Wei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Xuanang and Ning, Zhiwei and Zhang, Gengming and Cao, Jiaxi and Yang, Runze and Zheng, Zhonglong and Yang, Jie and Xiao, Rong and Liu, Wei}, title = {RoSAMDepth: Robust Self-supervised Depth Estimation Leveraging Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34138-34147} }
IVAAN: Instance-level Vision-Language Alignment via Attribute-Guided Text Prompts Generation for Nuclei Analysis: Jaehoon Jeong,

Yi Hu,

Soopil Kim,

Jongseong Jang,

Soonyoung Lee,

Sang Hyun Park; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Jaehoon and Hu, Yi and Kim, Soopil and Jang, Jongseong and Lee, Soonyoung and Park, Sang Hyun}, title = {IVAAN: Instance-level Vision-Language Alignment via Attribute-Guided Text Prompts Generation for Nuclei Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29305-29314} }
FedAlign: Differentially Private Distribution Alignment for Non-IID Federated Learning: Peng Wu,

Jiapeng Zhang,

Yingjie Song,

Xiong Xiao,

Zhuo Tang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Peng and Zhang, Jiapeng and Song, Yingjie and Xiao, Xiong and Tang, Zhuo}, title = {FedAlign: Differentially Private Distribution Alignment for Non-IID Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31769-31778} }
NeoVerse: Enhancing 4D World Model with in-the-wild Monocular Videos: Yuxue Yang,

Lue Fan,

Ziqi Shi,

Junran Peng,

Feng Wang,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuxue and Fan, Lue and Shi, Ziqi and Peng, Junran and Wang, Feng and Zhang, Zhaoxiang}, title = {NeoVerse: Enhancing 4D World Model with in-the-wild Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40340-40351} }
Revisiting Model Stitching In the Foundation Model Era: Zheda Mai,

Ke Zhang,

Fu-En Wang,

Zixiao Ken Wang,

Albert Y. C. Chen,

Lu Xia,

Min Sun,

Wei-Lun Chao,

Cheng-Hao Kuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Zheda and Zhang, Ke and Wang, Fu-En and Wang, Zixiao Ken and Chen, Albert Y. C. and Xia, Lu and Sun, Min and Chao, Wei-Lun and Kuo, Cheng-Hao}, title = {Revisiting Model Stitching In the Foundation Model Era}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41342-41351} }
Simple but Effective Triplet-Based Compression Strategies for Compact Visual Localization: Torsten Sattler,

Zuzana Kukelova; [pdf] [supp]
[bibtex]
@InProceedings{Sattler_2026_CVPR, author = {Sattler, Torsten and Kukelova, Zuzana}, title = {Simple but Effective Triplet-Based Compression Strategies for Compact Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29047-29059} }
Generalizable Co-Salient Object Detection via Mixed Content-Style Modulation: Guanting Guo,

Shenglong Hu,

Kaihua Zhang,

Guangcan Liu,

Min Xia; [pdf]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Guanting and Hu, Shenglong and Zhang, Kaihua and Liu, Guangcan and Xia, Min}, title = {Generalizable Co-Salient Object Detection via Mixed Content-Style Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32173-32183} }
SHAPE: Structure-aware Hierarchical Unsupervised Domain Adaptation with Plausibility Evaluation for Medical Image Segmentation: Linkuan Zhou,

Yinghao Xia,

Yufei Shen,

Xiangyu Li,

Wenjie Du,

Cong Cong,

Leyi Wei,

Ran Su,

Qiangguo Jin; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Linkuan and Xia, Yinghao and Shen, Yufei and Li, Xiangyu and Du, Wenjie and Cong, Cong and Wei, Leyi and Su, Ran and Jin, Qiangguo}, title = {SHAPE: Structure-aware Hierarchical Unsupervised Domain Adaptation with Plausibility Evaluation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30000-30010} }
Region-Aware Instance Consistency Learning for Micro-Expression Recognition: Yaomin Cai,

C. L. Philip Chen,

Shiting Xu,

Haiqi Liu,

Tong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Yaomin and Chen, C. L. Philip and Xu, Shiting and Liu, Haiqi and Zhang, Tong}, title = {Region-Aware Instance Consistency Learning for Micro-Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34428-34438} }
Sampling-Aware Quantization for Diffusion Models: Qian Zeng,

Jie Song,

Yuanyu Wan,

Huiqiong Wang,

Mingli Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Qian and Song, Jie and Wan, Yuanyu and Wang, Huiqiong and Song, Mingli}, title = {Sampling-Aware Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35831-35840} }
AdvFM: Lookahead Flow-Matching Velocity-Field Attacks for Imperceptible and Transferable Adversarial Examples: Runze Liu,

Zeyue Wang,

Fanghui Sun,

Rui Liu,

Yihan Yan,

Shen Wang,

Zhaoyang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Runze and Wang, Zeyue and Sun, Fanghui and Liu, Rui and Yan, Yihan and Wang, Shen and Zhang, Zhaoyang}, title = {AdvFM: Lookahead Flow-Matching Velocity-Field Attacks for Imperceptible and Transferable Adversarial Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42290-42299} }
MLLMSplat: A 2D MLLM-Powered Framework for 3D Gaussian Splatting Understanding, Generation, and Editing: Jingqiao Xiu,

Can Wang,

Dong Xu; [pdf] [supp]
[bibtex]
@InProceedings{Xiu_2026_CVPR, author = {Xiu, Jingqiao and Wang, Can and Xu, Dong}, title = {MLLMSplat: A 2D MLLM-Powered Framework for 3D Gaussian Splatting Understanding, Generation, and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33301-33311} }
Label What Matters: Modality-Balanced and Difficulty-Aware Multimodal Active Learning: Yuqiao Zeng,

Xu Wang,

Tengfei Liang,

Yiqing Hao,

Yi Jin,

Hui Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yuqiao and Wang, Xu and Liang, Tengfei and Hao, Yiqing and Jin, Yi and Yu, Hui}, title = {Label What Matters: Modality-Balanced and Difficulty-Aware Multimodal Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29391-29399} }
D-Convexity: A Unified Differentiable Convex Shape Prior via Quasi-Concavity for Data-driven Image Segmentation: Shengzhe Chen,

Hao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shengzhe and Yan, Hao}, title = {D-Convexity: A Unified Differentiable Convex Shape Prior via Quasi-Concavity for Data-driven Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34755-34764} }
Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models: Huatian Zhang,

Zhendong Mao,

Lei Zhang,

Yongdong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Huatian and Mao, Zhendong and Zhang, Lei and Zhang, Yongdong}, title = {Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37831-37841} }
AT-VLA: Adaptive Tactile Injection for Enhanced Feedback Reaction in Vision-Language-Action Models: Xiaoqi Li,

Muhe Cai,

Jiadong Xu,

Juan Zhu,

Hongwei Fan,

Yan Shen,

Guangrui Ren,

Hao Dong; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaoqi and Cai, Muhe and Xu, Jiadong and Zhu, Juan and Fan, Hongwei and Shen, Yan and Ren, Guangrui and Dong, Hao}, title = {AT-VLA: Adaptive Tactile Injection for Enhanced Feedback Reaction in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28764-28774} }
SAGE: Training Smart Any-Horizon Agents for Long Video Reasoning with Reinforcement Learning: Jitesh Jain,

Allen AI blank,

Jialuo Li,

Zixian Ma,

Jieyu Zhang,

Chris Dongjoo Kim,

Sangho Lee,

Rohun Tripathi,

Tanmay Gupta,

Christopher Clark,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Jain_2026_CVPR, author = {Jain, Jitesh and blank, Allen AI and Li, Jialuo and Ma, Zixian and Zhang, Jieyu and Kim, Chris Dongjoo and Lee, Sangho and Tripathi, Rohun and Gupta, Tanmay and Clark, Christopher and Shi, Humphrey}, title = {SAGE: Training Smart Any-Horizon Agents for Long Video Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41478-41488} }
Optical Flow Matching: Reframing Optical Flow as Continuous Transport Dynamics: Ao Luo,

Xin Li,

Fan Yang,

Yuezun Li,

Zhaoquan Yuan,

Shan Zhao,

Bing Su,

Xiao Wu; [pdf]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Ao and Li, Xin and Yang, Fan and Li, Yuezun and Yuan, Zhaoquan and Zhao, Shan and Su, Bing and Wu, Xiao}, title = {Optical Flow Matching: Reframing Optical Flow as Continuous Transport Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28669-28678} }
Smoothing the Score Function to Enhance Generalization in Diffusion Models: Xinyu Zhou,

Jiawei Zhang,

Stephen J. Wright; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xinyu and Zhang, Jiawei and Wright, Stephen J.}, title = {Smoothing the Score Function to Enhance Generalization in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43208-43217} }
SegMo: Co-Designing Content-Aware Sparsity and Locally-Cohesive Segment Parallelism for Efficient VLM Inference: Haojuan Li,

Ruohan Tang,

Dongzhou Cheng,

Zongpu Zhang,

Jian Li,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haojuan and Tang, Ruohan and Cheng, Dongzhou and Zhang, Zongpu and Li, Jian and Wang, Jiaqi}, title = {SegMo: Co-Designing Content-Aware Sparsity and Locally-Cohesive Segment Parallelism for Efficient VLM Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33598-33608} }
TempoMaster: Efficient Long Video Generation via Next-Frame-Rate Prediction: Yukuo Ma,

Cong Liu,

Junke Wang,

Junqi Liu,

Haibin Huang,

Zuxuan Wu,

Chi Zhang,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yukuo and Liu, Cong and Wang, Junke and Liu, Junqi and Huang, Haibin and Wu, Zuxuan and Zhang, Chi and Li, Xuelong}, title = {TempoMaster: Efficient Long Video Generation via Next-Frame-Rate Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30414-30424} }
Learning from Oblivion: Predicting Knowledge-Overflowed Weights via Retrodiction of Forgetting: Jinhyeok Jang,

Jaehong Kim,

Jung Uk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Jinhyeok and Kim, Jaehong and Kim, Jung Uk}, title = {Learning from Oblivion: Predicting Knowledge-Overflowed Weights via Retrodiction of Forgetting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39890-39900} }
EvoID: Reinforced Evolution for Identity-Preserving Video Generation: Yiheng Zhang,

Zhaofan Qiu,

Zunxu Liu,

Yingwei Pan,

Ting Yao,

Tao Mei; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiheng and Qiu, Zhaofan and Liu, Zunxu and Pan, Yingwei and Yao, Ting and Mei, Tao}, title = {EvoID: Reinforced Evolution for Identity-Preserving Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41870-41880} }
Flow4DGS-SLAM: Optical Flow-Guided 4D Gaussian Splatting SLAM: Yunsong Wang,

Gim Hee Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yunsong and Lee, Gim Hee}, title = {Flow4DGS-SLAM: Optical Flow-Guided 4D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33364-33373} }
Intervention-Aware Multiscale Representation Learning from Imaging Phenomics and Perturbation Transcriptomics: Jiayuan Chen,

Ruoqi Liu,

Zishan Gu,

Ping Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiayuan and Liu, Ruoqi and Gu, Zishan and Zhang, Ping}, title = {Intervention-Aware Multiscale Representation Learning from Imaging Phenomics and Perturbation Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41826-41835} }
HalluGen: Synthesizing Realistic and Controllable Hallucinations for Evaluating Image Restoration: Seunghoi Kim,

Henry F. J. Tregidgo,

Chen Jin,

Matteo Figini,

Daniel C. Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seunghoi and Tregidgo, Henry F. J. and Jin, Chen and Figini, Matteo and Alexander, Daniel C.}, title = {HalluGen: Synthesizing Realistic and Controllable Hallucinations for Evaluating Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32550-32560} }
NeuroSeg Meets DINOv3: Transferring 2D Self-Supervised Visual Priors to 3D Neuron Segmentation via DINOv3 Initialization: Yik San Cheng,

Runkai Zhao,

Weidong Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yik San and Zhao, Runkai and Cai, Weidong}, title = {NeuroSeg Meets DINOv3: Transferring 2D Self-Supervised Visual Priors to 3D Neuron Segmentation via DINOv3 Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30053-30064} }
TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis: Zhengpeng Feng,

Clement Atzberger,

Sadiq Jaffer,

Jovana Knezevic,

Silja Sormunen,

Robin Young,

Madeline C. Lisaius,

Markus Immitzer,

Toby Jackson,

James Ball,

David A. Coomes,

Anil Madhavapeddy,

Andrew Blake,

Srinivasan Keshav; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Zhengpeng and Atzberger, Clement and Jaffer, Sadiq and Knezevic, Jovana and Sormunen, Silja and Young, Robin and Lisaius, Madeline C. and Immitzer, Markus and Jackson, Toby and Ball, James and Coomes, David A. and Madhavapeddy, Anil and Blake, Andrew and Keshav, Srinivasan}, title = {TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34818-34831} }
Bi-directional Autoregressive Diffusion for Large Complex Motion Interpolation: Yongrui Ma,

Shijie Zhao,

Mingde Yao,

Junlin Li,

Li Zhang,

Xiaohong Liu,

Qi Dou,

Jinwei Gu,

Tianfan Xue; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yongrui and Zhao, Shijie and Yao, Mingde and Li, Junlin and Zhang, Li and Liu, Xiaohong and Dou, Qi and Gu, Jinwei and Xue, Tianfan}, title = {Bi-directional Autoregressive Diffusion for Large Complex Motion Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35779-35788} }
MOS: Mitigating Optical-SAR Modality Gap for Cross-Modal Ship Re-Identification: Yujian Zhao,

Hankun Liu,

Guanglin Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yujian and Liu, Hankun and Niu, Guanglin}, title = {MOS: Mitigating Optical-SAR Modality Gap for Cross-Modal Ship Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30335-30345} }
Latent Chain-of-Thought World Modeling for End-to-End Autonomous Driving: Shuhan Tan,

Kashyap Chitta,

Yuxiao Chen,

Ran Tian,

Yurong You,

Yan Wang,

Wenjie Luo,

Yulong Cao,

Philipp Krähenbühl,

Marco Pavone,

Boris Ivanovic; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Shuhan and Chitta, Kashyap and Chen, Yuxiao and Tian, Ran and You, Yurong and Wang, Yan and Luo, Wenjie and Cao, Yulong and Kr\"ahenb\"uhl, Philipp and Pavone, Marco and Ivanovic, Boris}, title = {Latent Chain-of-Thought World Modeling for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39724-39733} }
BDNet:Bio-Inspired Dual-Backbone Small Object Detection Network: Wenchao Guan,

Chuan Lin,

Sihan Huang,

Xiongzhen Wang,

Xintao Pang; [pdf] [supp]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Wenchao and Lin, Chuan and Huang, Sihan and Wang, Xiongzhen and Pang, Xintao}, title = {BDNet:Bio-Inspired Dual-Backbone Small Object Detection Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32724-32734} }
RoboTAG: End-to-end Robot Pose Estimation via Topological Alignment Graph: Yifan Liu,

Fangneng Zhan,

Wanhua Li,

Haowen Sun,

Katerina Fragkiadaki,

Hanspeter Pfister; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yifan and Zhan, Fangneng and Li, Wanhua and Sun, Haowen and Fragkiadaki, Katerina and Pfister, Hanspeter}, title = {RoboTAG: End-to-end Robot Pose Estimation via Topological Alignment Graph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35146-35155} }
SegQuant: A Semantics-Aware and Generalizable Quantization Framework for Diffusion Models: Jiaji Zhang,

Ruichao Sun,

Hailiang Zhao,

Jiaju Wu,

Peng Chen,

Hao Li,

Yuying Liu,

Kingsum Chow,

Gang Xiong,

Shuiguang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaji and Sun, Ruichao and Zhao, Hailiang and Wu, Jiaju and Chen, Peng and Li, Hao and Liu, Yuying and Chow, Kingsum and Xiong, Gang and Deng, Shuiguang}, title = {SegQuant: A Semantics-Aware and Generalizable Quantization Framework for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43717-43726} }
F^2HDR: Two-Stage HDR Video Reconstruction via Flow Adapter and Physical Motion Modeling: Huanjing Yue,

Dawei Li,

Shaoxiong Tu,

Jingyu Yang; [pdf] [supp]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Huanjing and Li, Dawei and Tu, Shaoxiong and Yang, Jingyu}, title = {F{\textasciicircum}2HDR: Two-Stage HDR Video Reconstruction via Flow Adapter and Physical Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33985-33994} }
Hi-Lo Prune: Look at What You'll Lose before Pruning with Hierarchical Token Selection: Zixun Sun,

Yubo Dong,

Hehe Fan,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zixun and Dong, Yubo and Fan, Hehe and Yang, Yi}, title = {Hi-Lo Prune: Look at What You'll Lose before Pruning with Hierarchical Token Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31941-31951} }
VideoWorld 2: Learning Transferable Knowledge from Real-world Videos: Zhongwei Ren,

Yunchao Wei,

Xiao Yu,

Guixun Luo,

Yao Zhao,

Bingyi Kang,

Jiashi Feng,

Xiaojie Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Zhongwei and Wei, Yunchao and Yu, Xiao and Luo, Guixun and Zhao, Yao and Kang, Bingyi and Feng, Jiashi and Jin, Xiaojie}, title = {VideoWorld 2: Learning Transferable Knowledge from Real-world Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40569-40580} }
RDFace: A Benchmark Dataset for Rare Disease Facial Image Analysis under Extreme Data Scarcity and Phenotype-Aware Synthetic Generation: Ganlin Feng,

Yuxi Long,

Hafsa Ali,

Erin Lou,

Fahad Butt,

Qian Liu,

Yang Wang,

Pingzhao Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Ganlin and Long, Yuxi and Ali, Hafsa and Lou, Erin and Butt, Fahad and Liu, Qian and Wang, Yang and Hu, Pingzhao}, title = {RDFace: A Benchmark Dataset for Rare Disease Facial Image Analysis under Extreme Data Scarcity and Phenotype-Aware Synthetic Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42976-42986} }
TherA: Thermal-Aware Visual-Language Prompting for Controllable RGB-to-Thermal Infrared Translation: Dong-Guw Lee,

Tai Hyoung Rhee,

Hyunsoo Jang,

Young-Sik Shin,

Ukcheol Shin,

Ayoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Dong-Guw and Rhee, Tai Hyoung and Jang, Hyunsoo and Shin, Young-Sik and Shin, Ukcheol and Kim, Ayoung}, title = {TherA: Thermal-Aware Visual-Language Prompting for Controllable RGB-to-Thermal Infrared Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36803-36813} }
Through the Frequency Lens: Cross-Domain Generalisable Gaze Estimation with Adaptive Modulation: Yang Xu,

Yiwei Bao,

Feng Lu; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yang and Bao, Yiwei and Lu, Feng}, title = {Through the Frequency Lens: Cross-Domain Generalisable Gaze Estimation with Adaptive Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42858-42868} }
PosterIQ: A Design Perspective Benchmark for Poster Understanding and Generation: Yuheng Feng,

Wen Zhang,

Haodong Duan,

Xingxing Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yuheng and Zhang, Wen and Duan, Haodong and Zou, Xingxing}, title = {PosterIQ: A Design Perspective Benchmark for Poster Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29295-29304} }
Rethinking 2D-3D Registration: A Novel Network for High-Value Zone Selection and Representation Consistency Alignment: Zhixin Cheng,

Bohao Liao,

Jiacheng Deng,

Xiaotian Yin,

Xinjun Li,

Yujia Chen,

Baoqun Yin,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zhixin and Liao, Bohao and Deng, Jiacheng and Yin, Xiaotian and Li, Xinjun and Chen, Yujia and Yin, Baoqun and Zhang, Tianzhu}, title = {Rethinking 2D-3D Registration: A Novel Network for High-Value Zone Selection and Representation Consistency Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39052-39063} }
Fuel Gauge: Estimating Chain-of-Thought Length Ahead of Time in Large Multimodal Models: Yuedong Yang,

Xiwen Wei,

Mustafa Munir,

Radu Marculescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuedong and Wei, Xiwen and Munir, Mustafa and Marculescu, Radu}, title = {Fuel Gauge: Estimating Chain-of-Thought Length Ahead of Time in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33436-33445} }
FB-CLIP: Fine-Grained Zero-Shot Anomaly Detection with Foreground-Background Disentanglement: Ming Hu,

Yongsheng Huo,

Mingyu Dou,

Jianfu Yin,

Peng Zhao,

Yao Wang,

Cong Hu,

Bingliang Hu,

Quan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Ming and Huo, Yongsheng and Dou, Mingyu and Yin, Jianfu and Zhao, Peng and Wang, Yao and Hu, Cong and Hu, Bingliang and Wang, Quan}, title = {FB-CLIP: Fine-Grained Zero-Shot Anomaly Detection with Foreground-Background Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35659-35669} }
DINO Eats CLIP: Adapting Beyond Knowns for Open-set 3D Object Retrieval: Xinwei He,

Yansong Zheng,

Qianru Han,

Zhichuan Wang,

Yuxuan Cai,

Yang Zhou,

Jingbo Xia,

Yulong Wang,

Jinhai Xiang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xinwei and Zheng, Yansong and Han, Qianru and Wang, Zhichuan and Cai, Yuxuan and Zhou, Yang and Xia, Jingbo and Wang, Yulong and Xiang, Jinhai and Bai, Xiang}, title = {DINO Eats CLIP: Adapting Beyond Knowns for Open-set 3D Object Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34704-34713} }
MedLIME: A Distribution-Aligned and Evidence-Supported Framework for Medical Saliency Explanations: Raghav Magazine,

Xingjian Li,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Magazine_2026_CVPR, author = {Magazine, Raghav and Li, Xingjian and Xu, Min}, title = {MedLIME: A Distribution-Aligned and Evidence-Supported Framework for Medical Saliency Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38926-38935} }
Diffusion with a Linguistic Compass: Steering the Generation of Clinically Plausible Future sMRI Representations for Early MCI Conversion Prediction: Zhihao Tang,

Chaozhuo Li,

Litian Zhang,

Xi Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zhihao and Li, Chaozhuo and Zhang, Litian and Zhang, Xi}, title = {Diffusion with a Linguistic Compass: Steering the Generation of Clinically Plausible Future sMRI Representations for Early MCI Conversion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42691-42700} }
MoLingo: Motion-Language Alignment for Text-to-Human Motion Generation: Yannan He,

Garvita Tiwari,

Xiaohan Zhang,

Pankaj Bora,

Tolga Birdal,

Jan Eric Lenssen,

Gerard Pons-Moll; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yannan and Tiwari, Garvita and Zhang, Xiaohan and Bora, Pankaj and Birdal, Tolga and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {MoLingo: Motion-Language Alignment for Text-to-Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38387-38398} }
VL-Eraser: Vacuum Distillation for Machine Unlearning in Vision-Language Models: Yili Wang,

Lu Dai,

Tairan Huang,

Yijie Xu,

Hui Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yili and Dai, Lu and Huang, Tairan and Xu, Yijie and Xiong, Hui}, title = {VL-Eraser: Vacuum Distillation for Machine Unlearning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31504-31513} }
CoordSpeaker: Exploiting Gesture Captioning for Coordinated Caption-Empowered Co-Speech Gesture Generation: Fengyi Fang,

Sicheng Yang,

Wenming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Fengyi and Yang, Sicheng and Yang, Wenming}, title = {CoordSpeaker: Exploiting Gesture Captioning for Coordinated Caption-Empowered Co-Speech Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30761-30771} }
Neu-PiG: Neural Preconditioned Grids for Fast Dynamic Surface Reconstruction on Long Sequences: Julian Kaltheuner,

Hannah Dröge,

Markus Plack,

Patrick Stotko,

Reinhard Klein; [pdf] [supp]
[bibtex]
@InProceedings{Kaltheuner_2026_CVPR, author = {Kaltheuner, Julian and Dr\"oge, Hannah and Plack, Markus and Stotko, Patrick and Klein, Reinhard}, title = {Neu-PiG: Neural Preconditioned Grids for Fast Dynamic Surface Reconstruction on Long Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36539-36549} }
VecAttention: Vector-wise Sparse Attention for Accelerating Long Context Inference: Anmin Liu,

Ruixuan Yang,

Huiqiang Jiang,

Bin Lin,

Minmin Sun,

Yong Li,

Chen Zhang,

Tao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Anmin and Yang, Ruixuan and Jiang, Huiqiang and Lin, Bin and Sun, Minmin and Li, Yong and Zhang, Chen and Xie, Tao}, title = {VecAttention: Vector-wise Sparse Attention for Accelerating Long Context Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41299-41310} }
Statistical Characteristic-Guided Denoising for Rapid High-Resolution Transmission Electron Microscopy Imaging: Hesong Li,

Ziqi Wu,

Ruiwen Shao,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hesong and Wu, Ziqi and Shao, Ruiwen and Fu, Ying}, title = {Statistical Characteristic-Guided Denoising for Rapid High-Resolution Transmission Electron Microscopy Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34050-34060} }
GDRO: Group-level Reward Post-training Suitable for Diffusion Models: Yiyang Wang,

Xi Chen,

Xiaogang Xu,

Yu Liu,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyang and Chen, Xi and Xu, Xiaogang and Liu, Yu and Zhao, Hengshuang}, title = {GDRO: Group-level Reward Post-training Suitable for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43505-43513} }
mmWaveFlow: Unified Enhancement and Generation of mmWave Human Point Clouds: Chang Su,

Beihong Jin,

Qiwen Shi,

Zhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Chang and Jin, Beihong and Shi, Qiwen and Wang, Zhi}, title = {mmWaveFlow: Unified Enhancement and Generation of mmWave Human Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31366-31376} }
Occlusion-Aware SORT: Observing Occlusion for Robust Multi-Object Tracking: Chunjiang Li,

Jianbo Ma,

Li Shen,

Yanru Chen,

Liangyin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunjiang and Ma, Jianbo and Shen, Li and Chen, Yanru and Chen, Liangyin}, title = {Occlusion-Aware SORT: Observing Occlusion for Robust Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42560-42570} }
TIACam: Text-Anchored Invariant Feature Learning with Auto-Augmentation for Camera-Robust Zero-Watermarking: Abdullah Tanvir,

Agnibh Dasgupta,

Xin Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tanvir_2026_CVPR, author = {Tanvir, Abdullah and Dasgupta, Agnibh and Zhong, Xin}, title = {TIACam: Text-Anchored Invariant Feature Learning with Auto-Augmentation for Camera-Robust Zero-Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43030-43039} }
AnthroTAP: Learning Point Tracking with Real-World Motion: Inès Hyeonsu Kim,

Seokju Cho,

Jahyeok Koo,

Junghyun Park,

Jiahui Huang,

Honglak Lee,

Joon-Young Lee,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Cho, Seokju and Koo, Jahyeok and Park, Junghyun and Huang, Jiahui and Lee, Honglak and Lee, Joon-Young and Kim, Seungryong}, title = {AnthroTAP: Learning Point Tracking with Real-World Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42516-42526} }
CodePercept: Code-Grounded Visual STEM Perception for MLLMs: Tongkun Guan,

Zhibo Yang,

Jianqiang Wan,

Mingkun Yang,

Zhentao Guo,

Zijian Hu,

Ruilin Luo,

Ruizhe Chen,

Songtao Jiang,

Peng Wang,

Wei Shen,

Junyang Lin,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Tongkun and Yang, Zhibo and Wan, Jianqiang and Yang, Mingkun and Guo, Zhentao and Hu, Zijian and Luo, Ruilin and Chen, Ruizhe and Jiang, Songtao and Wang, Peng and Shen, Wei and Lin, Junyang and Yang, Xiaokang}, title = {CodePercept: Code-Grounded Visual STEM Perception for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33542-33552} }
Bridging the Perception Gap in Image Super-Resolution Evaluation: Shaolin Su,

Josep M. Rocafort,

Danna Xue,

David Serrano-Lozano,

Lei Sun,

Javier Vazquez-Corral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Shaolin and Rocafort, Josep M. and Xue, Danna and Serrano-Lozano, David and Sun, Lei and Vazquez-Corral, Javier}, title = {Bridging the Perception Gap in Image Super-Resolution Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30532-30542} }
Collaborative Multi-Mode Pruning for Vision-Language Models: Zimeng Wu,

Yunhong Wang,

Donghao Wang,

Jiaxin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zimeng and Wang, Yunhong and Wang, Donghao and Chen, Jiaxin}, title = {Collaborative Multi-Mode Pruning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39561-39571} }
MIBURI: Towards Expressive Interactive Gesture Synthesis: M. Hamza Mughal,

Rishabh Dabral,

Vera Demberg,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mughal_2026_CVPR, author = {Mughal, M. Hamza and Dabral, Rishabh and Demberg, Vera and Theobalt, Christian}, title = {MIBURI: Towards Expressive Interactive Gesture Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40031-40041} }
EEGiT: Teaching Vision Transformers to Understand the EEG signal: Jiahao Zhou,

Chenghao Xu,

Wei Wang,

Erkun Yang,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Jiahao and Xu, Chenghao and Wang, Wei and Yang, Erkun and Deng, Cheng}, title = {EEGiT: Teaching Vision Transformers to Understand the EEG signal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40438-40447} }
Learning Surgical Robotic Manipulation with 3D Spatial Priors: Yu Sheng,

Lidian Wang,

Xiaomeng Chu,

Jiajun Deng,

Min Cheng,

Yanyong Zhang,

Bei Hua,

Houqiang Li,

Jianmin Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Yu and Wang, Lidian and Chu, Xiaomeng and Deng, Jiajun and Cheng, Min and Zhang, Yanyong and Hua, Bei and Li, Houqiang and Ji, Jianmin}, title = {Learning Surgical Robotic Manipulation with 3D Spatial Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42441-42451} }
Beyond the Global Scores: Fine-Grained Token Grounding as a Robust Detector of LVLM Hallucinations: Tuan Dung Nguyen,

Minh Khoi Ho,

Qi Chen,

Yutong Xie,

Cam-Tu Nguyen,

Minh Khoi Nguyen,

Dang Huy Pham Nguyen,

Anton van den Hengel,

Johan Verjans,

Phi Le Nguyen,

Vu Minh Hieu Phan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and Xie, Yutong and Nguyen, Cam-Tu and Nguyen, Minh Khoi and Nguyen, Dang Huy Pham and van den Hengel, Anton and Verjans, Johan and Le Nguyen, Phi and Phan, Vu Minh Hieu}, title = {Beyond the Global Scores: Fine-Grained Token Grounding as a Robust Detector of LVLM Hallucinations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40235-40244} }
Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation: Yunhong Lu,

Yanhong Zeng,

Haobo Li,

Hao Ouyang,

Qiuyu Wang,

Ka Leong Cheng,

Jiapeng Zhu,

Hengyuan Cao,

Zhipeng Zhang,

Xing Zhu,

Yujun Shen,

Min Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yunhong and Zeng, Yanhong and Li, Haobo and Ouyang, Hao and Wang, Qiuyu and Cheng, Ka Leong and Zhu, Jiapeng and Cao, Hengyuan and Zhang, Zhipeng and Zhu, Xing and Shen, Yujun and Zhang, Min}, title = {Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34385-34397} }
Graph-to-Frame RAG: Visual-Space Knowledge Fusion for Training-Free and Auditable Video Reasoning: Songyuan Yang,

Weijiang Yu,

Ziyu Liu,

Guijian Tang,

Wenjing Yang,

Huibin Tan,

Nong Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Yu, Weijiang and Liu, Ziyu and Tang, Guijian and Yang, Wenjing and Tan, Huibin and Xiao, Nong}, title = {Graph-to-Frame RAG: Visual-Space Knowledge Fusion for Training-Free and Auditable Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33805-33815} }
VCP-Attack: Visual-Contrastive Projection for Transferable Black-Box Targeted Attacks on Large Vision-Language Models: Jiawei Zhao,

Minjie Du,

Zihan Qin,

Zhuoran Wang,

Lizhe Xie,

Yining Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiawei and Du, Minjie and Qin, Zihan and Wang, Zhuoran and Xie, Lizhe and Hu, Yining}, title = {VCP-Attack: Visual-Contrastive Projection for Transferable Black-Box Targeted Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30110-30119} }
Guiding Diffusion Models with Fine-Grained Conditions and Semantics-Preserving Sampling for One-Shot Federated Learning: Xiaojun Deng,

Tianchi Liao,

Zhiyuan Liu,

Chuan Chen,

Zibin Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaojun and Liao, Tianchi and Liu, Zhiyuan and Chen, Chuan and Zheng, Zibin}, title = {Guiding Diffusion Models with Fine-Grained Conditions and Semantics-Preserving Sampling for One-Shot Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31779-31789} }
MemFlow: A Lightweight Forward Memorizing Framework for Quick Domain Adaptive Feature Mapping: Jianming Lv,

Chengjun Wang,

Depin Liang,

Qianli Ma,

Wei Chen,

Xueqi Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Jianming and Wang, Chengjun and Liang, Depin and Ma, Qianli and Chen, Wei and Cheng, Xueqi}, title = {MemFlow: A Lightweight Forward Memorizing Framework for Quick Domain Adaptive Feature Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36968-36977} }
See What We Cannot See: A Geo-guided Reasoning Benchmark for Object Counting under Adverse Earth Observation Conditions: Jiayi Wang,

Zhihong Tan,

Hongchen Wei,

Daiqin Yang,

Zhenzhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Tan, Zhihong and Wei, Hongchen and Yang, Daiqin and Chen, Zhenzhong}, title = {See What We Cannot See: A Geo-guided Reasoning Benchmark for Object Counting under Adverse Earth Observation Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42191-42201} }
WildPose: A Unified Framework for Robust Pose Estimation in the Wild: Jianhao Zheng,

Liyuan Zhu,

Zihan Zhu,

Iro Armeni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jianhao and Zhu, Liyuan and Zhu, Zihan and Armeni, Iro}, title = {WildPose: A Unified Framework for Robust Pose Estimation in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28903-28913} }
Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation: Ziyue Lin,

Jiahe Hou,

Hongyu Xia,

Xinrui Xie,

Feifei Wang,

Yuyin Zhou,

Wei Wang,

Jiawei Liu,

Liangqiong Qu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Ziyue and Hou, Jiahe and Xia, Hongyu and Xie, Xinrui and Wang, Feifei and Zhou, Yuyin and Wang, Wei and Liu, Jiawei and Qu, Liangqiong}, title = {Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35967-35977} }
2ndMatch: Finetuning Pruned Diffusion Models via Second-Order Jacobian Matching: Caleb Zheng,

Eli Shlizerman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Caleb and Shlizerman, Eli}, title = {2ndMatch: Finetuning Pruned Diffusion Models via Second-Order Jacobian Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43385-43395} }
CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning: Jiange Yang,

Yansong Shi,

Haoyi Zhu,

Mingyu Liu,

Kaijing Ma,

Yating Wang,

Gangshan Wu,

Tong He,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiange and Shi, Yansong and Zhu, Haoyi and Liu, Mingyu and Ma, Kaijing and Wang, Yating and Wu, Gangshan and He, Tong and Wang, Limin}, title = {CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42352-42363} }
Latent Diffusion Inversion Requires Understanding the Latent Space: Mingxing Rao,

Bowen Qu,

Daniel Moyer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rao_2026_CVPR, author = {Rao, Mingxing and Qu, Bowen and Moyer, Daniel}, title = {Latent Diffusion Inversion Requires Understanding the Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34971-34980} }
Seeing as Experts Do: A Knowledge-Augmented Agent for Open-Set Fine-Grained Visual Understanding: Junhan Chen,

Zilu Zhou,

Yujun Tong,

Dongliang Chang,

Yitao Luo,

Zhanyu Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junhan and Zhou, Zilu and Tong, Yujun and Chang, Dongliang and Luo, Yitao and Ma, Zhanyu}, title = {Seeing as Experts Do: A Knowledge-Augmented Agent for Open-Set Fine-Grained Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41446-41455} }
DSERT-RoLL: Robust Multi-Modal Perception for Diverse Driving Conditions with Stereo Event-RGB-Thermal Cameras, 4D Radar, and Dual-LiDAR: Hoonhee Cho,

Jae-Young Kang,

Yuhwan Jeong,

Yunseo Yang,

Wonyoung Lee,

Youngho Kim,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Hoonhee and Kang, Jae-Young and Jeong, Yuhwan and Yang, Yunseo and Lee, Wonyoung and Kim, Youngho and Yoon, Kuk-Jin}, title = {DSERT-RoLL: Robust Multi-Modal Perception for Diverse Driving Conditions with Stereo Event-RGB-Thermal Cameras, 4D Radar, and Dual-LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33022-33035} }
Seeing Motion Through Polarity for Event-based Action Recognition: Meiqi Cao,

Jiachao Zhang,

Xin Jiang,

Rui Yan,

Yazhou Yao,

Zechao Li,

Xiangbo Shu; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Meiqi and Zhang, Jiachao and Jiang, Xin and Yan, Rui and Yao, Yazhou and Li, Zechao and Shu, Xiangbo}, title = {Seeing Motion Through Polarity for Event-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37075-37085} }
Ultra-Fast Neural Video Compression: Jiahao Li,

Wenxuan Xie,

Zhaoyang Jia,

Bin Li,

Zongyu Guo,

Xiaoyi Zhang,

Yan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Xie, Wenxuan and Jia, Zhaoyang and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Lu, Yan}, title = {Ultra-Fast Neural Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41311-41321} }
CRAFT: Aligning Diffusion Models with Fine-Tuning Is Easier Than You Think: Zening Sun,

Zhengpeng Xie,

Lichen Bai,

Shitong Shao,

Shuo Yang,

Zeke Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zening and Xie, Zhengpeng and Bai, Lichen and Shao, Shitong and Yang, Shuo and Xie, Zeke}, title = {CRAFT: Aligning Diffusion Models with Fine-Tuning Is Easier Than You Think}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35841-35850} }
BEA-GS: BEyond RAdiance Supervision in 3DGS for Precise Object Extraction: Alessio Mazzucchelli,

Maria Naranjo-Almeida,

Jorge Bustos-Sanchez,

Mariella Dimiccoli,

Francesc Moreno-Noguer,

Jordi Sanchez-Riera,

Adrian Penate-Sanchez; [pdf] [supp]
[bibtex]
@InProceedings{Mazzucchelli_2026_CVPR, author = {Mazzucchelli, Alessio and Naranjo-Almeida, Maria and Bustos-Sanchez, Jorge and Dimiccoli, Mariella and Moreno-Noguer, Francesc and Sanchez-Riera, Jordi and Penate-Sanchez, Adrian}, title = {BEA-GS: BEyond RAdiance Supervision in 3DGS for Precise Object Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41054-41064} }
FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition: Jie Zhu,

Xiao Guo,

Yiyang Su,

Anil Jain,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Guo, Xiao and Su, Yiyang and Jain, Anil and Liu, Xiaoming}, title = {FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32756-32766} }
Learning Forgery-Aware Lip Representations Without Forgery Priors: Bofan Chen,

Hongyu Zhu,

Yi He,

Sichu Liang,

Shi-Lin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Bofan and Zhu, Hongyu and He, Yi and Liang, Sichu and Wang, Shi-Lin}, title = {Learning Forgery-Aware Lip Representations Without Forgery Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42911-42921} }
How to Take a Memorable Picture? Empowering Users with Actionable Feedback: Francesco Laiti,

Davide Talon,

Jacopo Staiano,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Laiti_2026_CVPR, author = {Laiti, Francesco and Talon, Davide and Staiano, Jacopo and Ricci, Elisa}, title = {How to Take a Memorable Picture? Empowering Users with Actionable Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29738-29749} }
ZINA: Multimodal Fine-grained Hallucination Detection and Editing: Yuiga Wada,

Kazuki Matsuda,

Komei Sugiura,

Graham Neubig; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wada_2026_CVPR, author = {Wada, Yuiga and Matsuda, Kazuki and Sugiura, Komei and Neubig, Graham}, title = {ZINA: Multimodal Fine-grained Hallucination Detection and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32528-32538} }
Exploring Visual Pretraining for Learning Language Intelligence: Zhonghan Zhao,

Yiming Zhang,

Wenwei Zhang,

Haiteng Zhao,

Xingguang Wei,

Zhangwei Gao,

Kuikun Liu,

Yuzhe Gu,

Size Wu,

Haian Huang,

Jianfei Gao,

Haijun Lv,

Demin Song,

Yunhua Zhou,

Qipeng Guo,

Gaoang Wang,

Kai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zhonghan and Zhang, Yiming and Zhang, Wenwei and Zhao, Haiteng and Wei, Xingguang and Gao, Zhangwei and Liu, Kuikun and Gu, Yuzhe and Wu, Size and Huang, Haian and Gao, Jianfei and Lv, Haijun and Song, Demin and Zhou, Yunhua and Guo, Qipeng and Wang, Gaoang and Chen, Kai}, title = {Exploring Visual Pretraining for Learning Language Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31493-31503} }
ReBaPL: Repulsive Bayesian Prompt Learning: Yassir Bendou,

Omar Ezzahir,

Eduardo Montesuma,

Gabriel Mahuas,

Victoria Shevchenko,

Mike Gartrell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bendou_2026_CVPR, author = {Bendou, Yassir and Ezzahir, Omar and Montesuma, Eduardo and Mahuas, Gabriel and Shevchenko, Victoria and Gartrell, Mike}, title = {ReBaPL: Repulsive Bayesian Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39962-39971} }
Model Merging in the Essential Subspace: Longhua Li,

Lei Qi,

Qi Tian,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Longhua and Qi, Lei and Tian, Qi and Geng, Xin}, title = {Model Merging in the Essential Subspace}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31145-31154} }
RecTok: Reconstruction Distillation along Rectified Flow: Qingyu Shi,

Size Wu,

Jinbin Bai,

Kaidong Yu,

Yujing Wang,

Yunhai Tong,

Xiangtai Li,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Qingyu and Wu, Size and Bai, Jinbin and Yu, Kaidong and Wang, Yujing and Tong, Yunhai and Li, Xiangtai and Li, Xuelong}, title = {RecTok: Reconstruction Distillation along Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40685-40695} }
Virtual Immunohistochemistry Staining with Dual-Aligned Multi-Task Feature Guidance: Shigeng Xie,

Hongming Xu,

Guiyang Jiang,

Tuomo Rossi,

Tommi Kärkkäinen,

Fengyu Cong; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Shigeng and Xu, Hongming and Jiang, Guiyang and Rossi, Tuomo and K\"arkk\"ainen, Tommi and Cong, Fengyu}, title = {Virtual Immunohistochemistry Staining with Dual-Aligned Multi-Task Feature Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35311-35320} }
Fixed Anchors Are Not Enough: Dynamic Retrieval and Persistent Homology for Dataset Distillation: Muquan Li,

Hang Gou,

Yingyi Ma,

Rongzheng Wang,

Ke Qin,

Tao He; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Muquan and Gou, Hang and Ma, Yingyi and Wang, Rongzheng and Qin, Ke and He, Tao}, title = {Fixed Anchors Are Not Enough: Dynamic Retrieval and Persistent Homology for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33963-33972} }
CASPA: Graph-Structured Concept Anchors for Modality-Agnostic Adaptation in Vision-Language Models: Abhiroop Chatterjee,

Susmita Ghosh,

Ashish Ghosh,

Emmett Ientilucci; [pdf] [supp]
[bibtex]
@InProceedings{Chatterjee_2026_CVPR, author = {Chatterjee, Abhiroop and Ghosh, Susmita and Ghosh, Ashish and Ientilucci, Emmett}, title = {CASPA: Graph-Structured Concept Anchors for Modality-Agnostic Adaptation in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31566-31576} }
4D-RGPT: Toward Region-level 4D Understanding via Perceptual Distillation: Chiao-An Yang,

Ryo Hachiuma,

Sifei Liu,

Subhashree Radhakrishnan,

Raymond A. Yeh,

Yu-Chiang Frank Wang,

Min-Hung Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Chiao-An and Hachiuma, Ryo and Liu, Sifei and Radhakrishnan, Subhashree and Yeh, Raymond A. and Wang, Yu-Chiang Frank and Chen, Min-Hung}, title = {4D-RGPT: Toward Region-level 4D Understanding via Perceptual Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31042-31053} }
EasyOmnimatte: Taming Pretrained Inpainting Diffusion Models for End-to-End Video Layered Decompositio: Yihan Hu,

Xuelin Chen,

Xiaodong Cun; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yihan and Chen, Xuelin and Cun, Xiaodong}, title = {EasyOmnimatte: Taming Pretrained Inpainting Diffusion Models for End-to-End Video Layered Decompositio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43341-43351} }
Interactive Episodic Memory with User Feedback: Nikesh Subedi,

Loris Bazzani,

Ziad Al-Halah; [pdf] [supp]
[bibtex]
@InProceedings{Subedi_2026_CVPR, author = {Subedi, Nikesh and Bazzani, Loris and Al-Halah, Ziad}, title = {Interactive Episodic Memory with User Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38826-38835} }
PolySLGen: Online Multimodal Speaking-Listening Reaction Generation in Polyadic Interaction: Zhi-Yi Lin,

Thomas Markhorst,

Jouh Yeong Chew,

Xucong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zhi-Yi and Markhorst, Thomas and Chew, Jouh Yeong and Zhang, Xucong}, title = {PolySLGen: Online Multimodal Speaking-Listening Reaction Generation in Polyadic Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29379-29390} }
Bypassing the Transport Plan: Dynamic Reweighting for Out-of-Distribution Detection with Optimal Transport: Yang Xiao,

Weiming Liu,

Jun Dan,

Tengyue Xu,

Fan Wang,

Hua Yu,

Junhao Dong,

Jiao Liu,

Shunjie Dong,

Lianyong Qi; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Yang and Liu, Weiming and Dan, Jun and Xu, Tengyue and Wang, Fan and Yu, Hua and Dong, Junhao and Liu, Jiao and Dong, Shunjie and Qi, Lianyong}, title = {Bypassing the Transport Plan: Dynamic Reweighting for Out-of-Distribution Detection with Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32026-32036} }
TDATR: Improving End-to-End Table Recognition via Table Detail-Aware Learning and Cell-Level Visual Alignment: Chunxia Qin,

Chenyu Liu,

Pengcheng Xia,

Jun Du,

Baocai Yin,

Bing Yin,

Cong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Chunxia and Liu, Chenyu and Xia, Pengcheng and Du, Jun and Yin, Baocai and Yin, Bing and Liu, Cong}, title = {TDATR: Improving End-to-End Table Recognition via Table Detail-Aware Learning and Cell-Level Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36838-36849} }
TopoCL: Topological Contrastive Learning for Medical Imaging: Guangyu Meng,

Pengfei Gu,

Peixian Liang,

John P. Lalor,

Erin Wolf Chambers,

Danny Z. Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Guangyu and Gu, Pengfei and Liang, Peixian and Lalor, John P. and Chambers, Erin Wolf and Chen, Danny Z.}, title = {TopoCL: Topological Contrastive Learning for Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42681-42690} }
MAPS: Preserving Vision-Language Representations via Module-Wise Proximity Scheduling for Better Vision-Language-Action Generalization: Chengyue Huang,

Mellon M. Zhang,

Robert Azarcon,

Glen Chou,

Zsolt Kira; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Chengyue and Zhang, Mellon M. and Azarcon, Robert and Chou, Glen and Kira, Zsolt}, title = {MAPS: Preserving Vision-Language Representations via Module-Wise Proximity Scheduling for Better Vision-Language-Action Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32451-32462} }
TRivia: Self-supervised Fine-tuning of Vision-Language Models for Table Recognition: Junyuan Zhang,

Bin Wang,

Qintong Zhang,

Fan Wu,

Zichen Wen,

Jialin Lu,

Junjie Shan,

Ziqi Zhao,

Shuya Yang,

Ziling Wang,

Ziyang Miao,

Huaping Zhong,

Yuhang Zang,

Xiaoyi Dong,

Ka-Ho Chow,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junyuan and Wang, Bin and Zhang, Qintong and Wu, Fan and Wen, Zichen and Lu, Jialin and Shan, Junjie and Zhao, Ziqi and Yang, Shuya and Wang, Ziling and Miao, Ziyang and Zhong, Huaping and Zang, Yuhang and Dong, Xiaoyi and Chow, Ka-Ho and He, Conghui}, title = {TRivia: Self-supervised Fine-tuning of Vision-Language Models for Table Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33196-33206} }
AudioStory: Generating Long-Form Narrative Audio with Large Language Models: Yuxin Guo,

Teng Wang,

Yuying Ge,

Shijie Ma,

Yixiao Ge,

Wei Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yuxin and Wang, Teng and Ge, Yuying and Ma, Shijie and Ge, Yixiao and Zou, Wei}, title = {AudioStory: Generating Long-Form Narrative Audio with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37735-37744} }
DialogueVPR: Towards Conversational Visual Place Recognition: Yukun Song,

Changwei Wang,

Xingtian Pei,

Shibiao Xu,

Wenhao Xu,

Shunpeng Chen,

Yu Zhang,

Ke Zhang,

Rongtao Xu,

Xuxiang Feng,

Pengyang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yukun and Wang, Changwei and Pei, Xingtian and Xu, Shibiao and Xu, Wenhao and Chen, Shunpeng and Zhang, Yu and Zhang, Ke and Xu, Rongtao and Feng, Xuxiang and Wang, Pengyang}, title = {DialogueVPR: Towards Conversational Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41100-41110} }
RC-NF: Robot-Conditioned Normalizing Flow for Real-Time Anomaly Detection in Robotic Manipulation: Shijie Zhou,

Bin Zhu,

Jiarui Yang,

Xiangyu Zhao,

Jingjing Chen,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shijie and Zhu, Bin and Yang, Jiarui and Zhao, Xiangyu and Chen, Jingjing and Jiang, Yu-Gang}, title = {RC-NF: Robot-Conditioned Normalizing Flow for Real-Time Anomaly Detection in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43050-43060} }
ClipGStream: Clip-Stream Gaussian Splatting for Any Length and Any Motion Multi-View Dynamic Scene Reconstruction: Jie Liang,

Jiahao Wu,

Chao Wang,

Jiayu Yang,

Xiaoyun Zheng,

Kaiqiang Xiong,

Zhanke Wang,

Jinbo Yan,

Feng Gao,

Ronggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Jie and Wu, Jiahao and Wang, Chao and Yang, Jiayu and Zheng, Xiaoyun and Xiong, Kaiqiang and Wang, Zhanke and Yan, Jinbo and Gao, Feng and Wang, Ronggang}, title = {ClipGStream: Clip-Stream Gaussian Splatting for Any Length and Any Motion Multi-View Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41022-41032} }
Why Not Hyperparameter-Friendly Optimisation? A Monotonic Adaptive Norm Rescaling Approach For Long-Tailed Recognition: Shuo Zhang,

Chenqi Li,

Tingting Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuo and Li, Chenqi and Zhu, Tingting}, title = {Why Not Hyperparameter-Friendly Optimisation? A Monotonic Adaptive Norm Rescaling Approach For Long-Tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36947-36956} }
Multi-view Pyramid Transformer: Look Coarser to See Broader: Gyeongjin Kang,

Seungkwon Yang,

Seungtae Nam,

Younggeun Lee,

Jungwoo Kim,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Gyeongjin and Yang, Seungkwon and Nam, Seungtae and Lee, Younggeun and Kim, Jungwoo and Park, Eunbyung}, title = {Multi-view Pyramid Transformer: Look Coarser to See Broader}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37380-37390} }
EasyV2V: A High-quality Instruction-based Video Editing Framework: Jinjie Mai,

Chaoyang Wang,

Gordon Guocheng Qian,

Willi Menapace,

Sergey Tulyakov,

Bernard Ghanem,

Peter Wonka,

Ashkan Mirzaei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Jinjie and Wang, Chaoyang and Qian, Gordon Guocheng and Menapace, Willi and Tulyakov, Sergey and Ghanem, Bernard and Wonka, Peter and Mirzaei, Ashkan}, title = {EasyV2V: A High-quality Instruction-based Video Editing Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30435-30445} }
Open-Ended Instruction Realization with LLM-Enabled Multi-Planner Scheduling in Autonomous Vehicles: Jiawei Liu,

Xun Gong,

Fen Fang,

Muli Yang,

Bohao Qu,

Yunfeng Hu,

Hong Chen,

Xulei Yang,

Qing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiawei and Gong, Xun and Fang, Fen and Yang, Muli and Qu, Bohao and Hu, Yunfeng and Chen, Hong and Yang, Xulei and Guo, Qing}, title = {Open-Ended Instruction Realization with LLM-Enabled Multi-Planner Scheduling in Autonomous Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32070-32081} }
OralGPT-Plus: Learning to Use Visual Tools via Reinforcement Learning for Panoramic X-ray Analysis: Yuxuan Fan,

Jing Hao,

Hong Chen,

Jiahao Bao,

Yihua Shao,

Yuci Liang,

Kuo Feng Hung,

Hao Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yuxuan and Hao, Jing and Chen, Hong and Bao, Jiahao and Shao, Yihua and Liang, Yuci and Hung, Kuo Feng and Tang, Hao}, title = {OralGPT-Plus: Learning to Use Visual Tools via Reinforcement Learning for Panoramic X-ray Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35373-35383} }
LS-ViT: Least-Squares Hessian Based Block Reconstruction for Low-Bit Post-Training Quantization of Vision Transformers: Hyunha Hwang,

Xuan Truong Nguyen,

Hyuk-Jae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Hwang_2026_CVPR, author = {Hwang, Hyunha and Nguyen, Xuan Truong and Lee, Hyuk-Jae}, title = {LS-ViT: Least-Squares Hessian Based Block Reconstruction for Low-Bit Post-Training Quantization of Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33588-33597} }
CURE: Curriculum-guided Multi-task Training for Reliable Anatomy Grounded Report Generation: Pablo Messina,

Andrés Villa,

Juan Leon Alcazar,

Karen Sanchez,

Carlos Hinojosa,

Denis Parra,

Alvaro Soto,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Messina_2026_CVPR, author = {Messina, Pablo and Villa, Andr\'es and Alcazar, Juan Leon and Sanchez, Karen and Hinojosa, Carlos and Parra, Denis and Soto, Alvaro and Ghanem, Bernard}, title = {CURE: Curriculum-guided Multi-task Training for Reliable Anatomy Grounded Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36279-36289} }
Personalized Longitudinal Medical Report Generation via Temporally-Aware Federated Adaptation: He Zhu,

Ren Togo,

Takahiro Ogawa,

Kenji Hirata,

Minghui Tang,

Takaaki Yoshimura,

Hiroyuki Sugimori,

Noriko Nishioka,

Yukie Shimizu,

Kohsuke Kudo,

Miki Haseyama; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, He and Togo, Ren and Ogawa, Takahiro and Hirata, Kenji and Tang, Minghui and Yoshimura, Takaaki and Sugimori, Hiroyuki and Nishioka, Noriko and Shimizu, Yukie and Kudo, Kohsuke and Haseyama, Miki}, title = {Personalized Longitudinal Medical Report Generation via Temporally-Aware Federated Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42701-42710} }
Free-Grained Hierarchical Visual Recognition: Seulki Park,

Zilin Wang,

Stella X. Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Seulki and Wang, Zilin and Yu, Stella X.}, title = {Free-Grained Hierarchical Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32767-32776} }
KV-Tracker: Real-Time Pose Tracking with Transformers: Marwan Taher,

Ignacio Alzugaray,

Kirill Mazur,

Xin Kong,

Andrew Davison; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taher_2026_CVPR, author = {Taher, Marwan and Alzugaray, Ignacio and Mazur, Kirill and Kong, Xin and Davison, Andrew}, title = {KV-Tracker: Real-Time Pose Tracking with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28990-28999} }
Diffusion MRI Transformer with a Diffusion Space Rotary Positional Embedding (D-RoPE): Gustavo Chau Loo Kung,

Mohammad Abbasi,

Camila Blank,

Juze Zhang,

Alan Q. Wang,

Sophie Ostmeier,

Akshay Chaudhari,

Kilian Pohl,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kung_2026_CVPR, author = {Kung, Gustavo Chau Loo and Abbasi, Mohammad and Blank, Camila and Zhang, Juze and Wang, Alan Q. and Ostmeier, Sophie and Chaudhari, Akshay and Pohl, Kilian and Adeli, Ehsan}, title = {Diffusion MRI Transformer with a Diffusion Space Rotary Positional Embedding (D-RoPE)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38430-38441} }
M4V: Multimodal Mamba for Efficient Text-to-Video Generation: Jiancheng Huang,

Gengwei Zhang,

Zequn Jie,

Siyu Jiao,

Yinlong Qian,

Ling Chen,

Yunchao Wei,

Lin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiancheng and Zhang, Gengwei and Jie, Zequn and Jiao, Siyu and Qian, Yinlong and Chen, Ling and Wei, Yunchao and Ma, Lin}, title = {M4V: Multimodal Mamba for Efficient Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36747-36757} }
TableMix: Enhancing Multimodal Table Reasoning in MLLMs from a Data-Centric Perspective: Chaohu Liu,

Shida Wang,

Yubo Wang,

Linli Xu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chaohu and Wang, Shida and Wang, Yubo and Xu, Linli}, title = {TableMix: Enhancing Multimodal Table Reasoning in MLLMs from a Data-Centric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33553-33565} }
IntrinsicWeather: Controllable Weather Editing in Intrinsic Space: Yixin Zhu,

Zuo-Liang Zhu,

Jian Yang,

Miloš Hašan,

Jin Xie,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yixin and Zhu, Zuo-Liang and Yang, Jian and Ha\v{s}an, Milo\v{s} and Xie, Jin and Wang, Beibei}, title = {IntrinsicWeather: Controllable Weather Editing in Intrinsic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30772-30781} }
Information-Theoretic Decomposition for Multimodal Interaction Learning: Zequn Yang,

Yake Wei,

Haotian Ni,

Zhihao Xu,

Di Hu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zequn and Wei, Yake and Ni, Haotian and Xu, Zhihao and Hu, Di}, title = {Information-Theoretic Decomposition for Multimodal Interaction Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30278-30287} }
Learnability-Guided Diffusion for Dataset Distillation: Jeffrey A. Chan-Santiago,

Mubarak Shah; [pdf] [supp]
[bibtex]
@InProceedings{Chan-Santiago_2026_CVPR, author = {Chan-Santiago, Jeffrey A. and Shah, Mubarak}, title = {Learnability-Guided Diffusion for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41657-41666} }
Taming Sampling Perturbations with Variance Expansion Loss for Latent Diffusion Models: Qifan Li,

Xingyu Zhou,

Jinhua Zhang,

Weiyi You,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qifan and Zhou, Xingyu and Zhang, Jinhua and You, Weiyi and Gu, Shuhang}, title = {Taming Sampling Perturbations with Variance Expansion Loss for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43643-43652} }
Seeing Beyond: Extrapolative Domain Adaptive Panoramic Segmentation: Yuanfan Zheng,

Kunyu Peng,

Xu Zheng,

Kailun Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yuanfan and Peng, Kunyu and Zheng, Xu and Yang, Kailun}, title = {Seeing Beyond: Extrapolative Domain Adaptive Panoramic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42115-42125} }
PointGS: Semantic-Consistent Unsupervised 3D Point Cloud Segmentation with 3D Gaussian Splatting: Yixiao Song,

Qingyong Li,

Wen Wang,

Zhicheng Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yixiao and Li, Qingyong and Wang, Wen and Yan, Zhicheng}, title = {PointGS: Semantic-Consistent Unsupervised 3D Point Cloud Segmentation with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33343-33352} }
Venus: Benchmarking and Empowering Multimodal Large Language Models for Aesthetic Guidance and Cropping: Tianxiang Du,

Hulingxiao He,

Yuxin Peng; [pdf] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Tianxiang and He, Hulingxiao and Peng, Yuxin}, title = {Venus: Benchmarking and Empowering Multimodal Large Language Models for Aesthetic Guidance and Cropping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37766-37776} }
IEBGL:An Interpretability-Enhanced Brain Graph Learning Framework with LLM-Instructed Topology and Literature-Augmented Semantics: Yihang Duan,

Shuo Huang,

Li Zhang,

Meiling Wang,

Li Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Yihang and Huang, Shuo and Zhang, Li and Wang, Meiling and Zhang, Li}, title = {IEBGL:An Interpretability-Enhanced Brain Graph Learning Framework with LLM-Instructed Topology and Literature-Augmented Semantics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35331-35340} }
Mind the Discriminability Trap in Source-Free Cross-domain Few-shot Learning: Zhenyu Zhang,

Yixiong Zou,

Yuhua Li,

Ruixuan Li,

Guangyao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenyu and Zou, Yixiong and Li, Yuhua and Li, Ruixuan and Chen, Guangyao}, title = {Mind the Discriminability Trap in Source-Free Cross-domain Few-shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36978-36988} }
Progressive Supernet Training for Efficient Visual Autoregressive Modeling: Xiaoyue Chen,

Yuling Shi,

Kaiyuan Li,

Huandong Wang,

Yong Li,

Xiaodong Gu,

Xinlei Chen,

Mingbao Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaoyue and Shi, Yuling and Li, Kaiyuan and Wang, Huandong and Li, Yong and Gu, Xiaodong and Chen, Xinlei and Lin, Mingbao}, title = {Progressive Supernet Training for Efficient Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37950-37959} }
Multi-Paradigm Collaborative Adversarial Attack Against Multi-Modal Large Language Models: Yuanbo Li,

Tianyang Xu,

Cong Hu,

Tao Zhou,

Xiao-Jun Wu,

Josef Kittler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuanbo and Xu, Tianyang and Hu, Cong and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Multi-Paradigm Collaborative Adversarial Attack Against Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30065-30075} }
Incremental Object Detection via Future-Aware Decoupled Cross-Head Distillation: Chenfeng Yin,

De Cheng,

Wenlong Luo,

Mingyue Zeng,

Shizhou Zhang,

Nannan Wang,

Xinbo Gao; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Chenfeng and Cheng, De and Luo, Wenlong and Zeng, Mingyue and Zhang, Shizhou and Wang, Nannan and Gao, Xinbo}, title = {Incremental Object Detection via Future-Aware Decoupled Cross-Head Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39935-39944} }
Learning to Drive is a Free Gift: Large-Scale Label-Free Autonomy Pretraining from Unposed In-The-Wild Videos: Matthew Strong,

Wei-Jer Chang,

Quentin Herau,

Jiezhi Yang,

Yihan Hu,

Chensheng Peng,

Wei Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Strong_2026_CVPR, author = {Strong, Matthew and Chang, Wei-Jer and Herau, Quentin and Yang, Jiezhi and Hu, Yihan and Peng, Chensheng and Zhan, Wei}, title = {Learning to Drive is a Free Gift: Large-Scale Label-Free Autonomy Pretraining from Unposed In-The-Wild Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32144-32153} }
TransPrune: Token Transition Pruning for Efficient Large Vision-Language Model: Ao Li,

Yuxiang Duan,

Jinghui Zhang,

Congbo Ma,

Yutong Xie,

Gustavo Carneiro,

Mohammad Yaqub,

Hu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ao and Duan, Yuxiang and Zhang, Jinghui and Ma, Congbo and Xie, Yutong and Carneiro, Gustavo and Yaqub, Mohammad and Wang, Hu}, title = {TransPrune: Token Transition Pruning for Efficient Large Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39529-39538} }
SEA-Flow3D: Simplified, Efficient, and Accurate Scene Flow via Spatial Vector Sampling and Multi-scale Refinement: Han Ling,

Quansen Sun,

Yinghua Yao,

Ivor Tsang,

Yinghui Sun; [pdf] [supp]
[bibtex]
@InProceedings{Ling_2026_CVPR, author = {Ling, Han and Sun, Quansen and Yao, Yinghua and Tsang, Ivor and Sun, Yinghui}, title = {SEA-Flow3D: Simplified, Efficient, and Accurate Scene Flow via Spatial Vector Sampling and Multi-scale Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36475-36484} }
SAMTok: Representing Any Mask with Two Words: Yikang Zhou,

Tao Zhang,

Dengxian Gong,

Yuanzheng Wu,

Ye Tian,

Haochen Wang,

Haobo Yuan,

Jiacong Wang,

Lu Qi,

Hao Fei,

Shunping Ji,

Anran Wang,

Zhuochen Wang,

Yujing Wang,

Cheng Chen,

Xiangtai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yikang and Zhang, Tao and Gong, Dengxian and Wu, Yuanzheng and Tian, Ye and Wang, Haochen and Yuan, Haobo and Wang, Jiacong and Qi, Lu and Fei, Hao and Ji, Shunping and Wang, Anran and Wang, Zhuochen and Wang, Yujing and Chen, Cheng and Li, Xiangtai}, title = {SAMTok: Representing Any Mask with Two Words}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37852-37863} }
Iris: Integrating Language into Diffusion-based Monocular Depth Estimation: Ziyao Zeng,

Jingcheng Ni,

Daniel Wang,

Patrick Rim,

Younjoon Chung,

Fengyu Yang,

Byung-Woo Hong,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyao and Ni, Jingcheng and Wang, Daniel and Rim, Patrick and Chung, Younjoon and Yang, Fengyu and Hong, Byung-Woo and Wong, Alex}, title = {Iris: Integrating Language into Diffusion-based Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34193-34205} }
DecoVLN: Decoupling Observation, Reasoning, and Correction for Vision-and-Language Navigation: Zihao Xin,

Wentong Li,

Yixuan Jiang,

Bin Wang,

Runmin Cong,

Jie Qin,

Shengjun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xin_2026_CVPR, author = {Xin, Zihao and Li, Wentong and Jiang, Yixuan and Wang, Bin and Cong, Runmin and Qin, Jie and Huang, Shengjun}, title = {DecoVLN: Decoupling Observation, Reasoning, and Correction for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32410-32420} }
SafeLogo: Turning Your Logos into Jailbreak Shields via Micro-Regional Adversarial Training: Zhiyi Duan,

Xiaoyue Zhang,

Tianxing Man; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Zhiyi and Zhang, Xiaoyue and Man, Tianxing}, title = {SafeLogo: Turning Your Logos into Jailbreak Shields via Micro-Regional Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37611-37620} }
Attention Surgery: An Efficient Recipe to Linearize Your Video Diffusion Transformer: Mohsen Ghafoorian,

Denis Korzhenkov,

Amirhossein Habibian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghafoorian_2026_CVPR, author = {Ghafoorian, Mohsen and Korzhenkov, Denis and Habibian, Amirhossein}, title = {Attention Surgery: An Efficient Recipe to Linearize Your Video Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32915-32925} }
VIRO: Robust and Efficient Neuro-Symbolic Reasoning with Verification for Referring Expression Comprehension: Hyejin Park,

Junhyuk Kwon,

Suha Kwak,

Jungseul Ok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Hyejin and Kwon, Junhyuk and Kwak, Suha and Ok, Jungseul}, title = {VIRO: Robust and Efficient Neuro-Symbolic Reasoning with Verification for Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33426-33435} }
Better, Stronger, Faster: Tackling the Trilemma in MLLM-based Segmentation with Simultaneous Textual Mask Prediction: Jiazhen Liu,

Mingkuan Feng,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiazhen and Feng, Mingkuan and Chen, Long}, title = {Better, Stronger, Faster: Tackling the Trilemma in MLLM-based Segmentation with Simultaneous Textual Mask Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33121-33130} }
Mirror Illusion Art: Xiaopei Zhu,

Zeyuan Li,

Jun Zhu,

Xiaolin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaopei and Li, Zeyuan and Zhu, Jun and Hu, Xiaolin}, title = {Mirror Illusion Art}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31577-31585} }
DiffusionHarmonizer: Bridging Neural Reconstruction and Photorealistic Simulation with Online Diffusion Enhancer: Yuxuan Zhang,

Katarína Tóthová,

Zian Wang,

Kangxue Yin,

Haithem Turki,

Riccardo de Lutio,

Yen-Yu Chang,

Or Litany,

Sanja Fidler,

Zan Gojcic; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and T\'othov\'a, Katar{\'\i}na and Wang, Zian and Yin, Kangxue and Turki, Haithem and de Lutio, Riccardo and Chang, Yen-Yu and Litany, Or and Fidler, Sanja and Gojcic, Zan}, title = {DiffusionHarmonizer: Bridging Neural Reconstruction and Photorealistic Simulation with Online Diffusion Enhancer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43494-43504} }
Parallel Rigidity Matters for Bundle Adjustment: Lalit Manam,

Venu Madhav Govindu; [pdf] [supp]
[bibtex]
@InProceedings{Manam_2026_CVPR, author = {Manam, Lalit and Govindu, Venu Madhav}, title = {Parallel Rigidity Matters for Bundle Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29035-29046} }
BiPreManip: Learning Affordance-Based Bimanual Preparatory Manipulation through Anticipatory Collaboration: Yan Shen,

Feng Jiang,

Zichen He,

Xiaoqi Li,

Yuchen Liu,

Zhiyu Li,

Ruihai Wu,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Yan and Jiang, Feng and He, Zichen and Li, Xiaoqi and Liu, Yuchen and Li, Zhiyu and Wu, Ruihai and Dong, Hao}, title = {BiPreManip: Learning Affordance-Based Bimanual Preparatory Manipulation through Anticipatory Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42430-42440} }
RegionRoute: Regional Style Transfer with Diffusion Model: Bowen Chen,

Jake Zuena,

Alan C. Bovik,

Divya Kothandaraman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Bowen and Zuena, Jake and Bovik, Alan C. and Kothandaraman, Divya}, title = {RegionRoute: Regional Style Transfer with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35736-35746} }
Merge3D: Efficient 3D Multimodal LLMs via Joint 2D-3D Token Merging: Tianbo Pan,

Xingyi Yang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Tianbo and Yang, Xingyi and Wang, Xinchao}, title = {Merge3D: Efficient 3D Multimodal LLMs via Joint 2D-3D Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31066-31077} }
IMAIA: Interactive Maps AI Assistant for Travel Planning and Geo-Spatial Intelligence: Jieren Deng,

Zhizhang Hu,

Ziyan He,

Aleksandar Cvetkovic,

Pak Kiu Chung,

Dragomir Yankov,

Chiqun Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Jieren and Hu, Zhizhang and He, Ziyan and Cvetkovic, Aleksandar and Chung, Pak Kiu and Yankov, Dragomir and Zhang, Chiqun}, title = {IMAIA: Interactive Maps AI Assistant for Travel Planning and Geo-Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40064-40073} }
ActiveGrasp: Information-Guided Active Grasping with Calibrated Energy-based Model: Boshu Lei,

Wen Jiang,

Kostas Daniilidis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Boshu and Jiang, Wen and Daniilidis, Kostas}, title = {ActiveGrasp: Information-Guided Active Grasping with Calibrated Energy-based Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42418-42429} }
Bridging Privacy and Provenance: Traceable Virtual Identity Generation: Xianhan Zeng,

Xiaoxiao Hu,

Sheng Li,

Zhenxing Qian,

Xinpeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xianhan and Hu, Xiaoxiao and Li, Sheng and Qian, Zhenxing and Zhang, Xinpeng}, title = {Bridging Privacy and Provenance: Traceable Virtual Identity Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32367-32376} }
Convexity-Aware Noise Calibration: A Self-Supervised Framework for Noise-Level-Unknown Image Denoising: Zhan Wang,

Leiquan Wang,

Chunlei Wu,

Yu Meng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhan and Wang, Leiquan and Wu, Chunlei and Meng, Yu}, title = {Convexity-Aware Noise Calibration: A Self-Supervised Framework for Noise-Level-Unknown Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29929-29938} }
LEMON: A Large Endoscopic MONocular Dataset and Foundation Model for Perception in Surgical Settings: Chengan Che,

Chao Wang,

Tom Vercauteren,

Sophia Tsoka,

Luis C. Garcia-Peraza-Herrera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Vercauteren, Tom and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {LEMON: A Large Endoscopic MONocular Dataset and Foundation Model for Perception in Surgical Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42659-42669} }
FairLLaVA: Fairness-Aware Parameter-Efficient Fine-Tuning for Large Vision-Language Assistants: Mahesh Bhosale,

Abdul Wasi,

Shantam Srivastava,

Shifa Latif,

Tianyu Luan,

Mingchen Gao,

David Doermann,

Xuan Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhosale_2026_CVPR, author = {Bhosale, Mahesh and Wasi, Abdul and Srivastava, Shantam and Latif, Shifa and Luan, Tianyu and Gao, Mingchen and Doermann, David and Gong, Xuan}, title = {FairLLaVA: Fairness-Aware Parameter-Efficient Fine-Tuning for Large Vision-Language Assistants}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42615-42625} }
Large-scale Robust Enhanced Ensemble Clustering via Outlier Decoupling: Jiaxuan Xu,

Lei Duan,

Xinye Wang,

Liang Du; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiaxuan and Duan, Lei and Wang, Xinye and Du, Liang}, title = {Large-scale Robust Enhanced Ensemble Clustering via Outlier Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39691-39700} }
Event-Based Motion Deblurring Using Task-Oriented 3D Gaussian Event Representations: Shengdong Xue,

Haoxiang Ma,

Hao Chen,

Zhen Yang,

Yongjian Deng; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Shengdong and Ma, Haoxiang and Chen, Hao and Yang, Zhen and Deng, Yongjian}, title = {Event-Based Motion Deblurring Using Task-Oriented 3D Gaussian Event Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29547-29556} }
IsoCLIP: Decomposing CLIP Projectors for Efficient Intra-modal Alignment: Simone Magistri,

Dipam Goswami,

Marco Mistretta,

Bartłomiej Twardowski,

Joost van de Weijer,

Andrew D. Bagdanov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magistri_2026_CVPR, author = {Magistri, Simone and Goswami, Dipam and Mistretta, Marco and Twardowski, Bart{\l}omiej and van de Weijer, Joost and Bagdanov, Andrew D.}, title = {IsoCLIP: Decomposing CLIP Projectors for Efficient Intra-modal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29315-29324} }
RISE: Single Static Radar-based Indoor Scene Understanding: Kaichen Zhou,

Laura Dodds,

Sayed Saad Afzal,

Fadel Adib; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Kaichen and Dodds, Laura and Afzal, Sayed Saad and Adib, Fadel}, title = {RISE: Single Static Radar-based Indoor Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32194-32205} }
FlowDIS: Language-Guided Dichotomous Image Segmentation with Flow Matching: Andranik Sargsyan,

Shant Navasardyan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargsyan_2026_CVPR, author = {Sargsyan, Andranik and Navasardyan, Shant}, title = {FlowDIS: Language-Guided Dichotomous Image Segmentation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42039-42048} }
OmniFood8K: Single-Image Nutrition Estimation via Hierarchical Frequency-Aligned Fusion: Dongjian Yu,

Weiqing Min,

Qian Jiang,

Xing Lin,

Xin Jin,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Dongjian and Min, Weiqing and Jiang, Qian and Lin, Xing and Jin, Xin and Jiang, Shuqiang}, title = {OmniFood8K: Single-Image Nutrition Estimation via Hierarchical Frequency-Aligned Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41562-41572} }
SketchFaceGS: Real-Time Sketch-Driven Face Editing and Generation with Gaussian Splatting: Bo Li,

Jiahao Kang,

Yubo Ma,

Feng-Lin Liu,

Bin Liu,

Fang-Lue Zhang,

Lin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bo and Kang, Jiahao and Ma, Yubo and Liu, Feng-Lin and Liu, Bin and Zhang, Fang-Lue and Gao, Lin}, title = {SketchFaceGS: Real-Time Sketch-Driven Face Editing and Generation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40020-40030} }
DeepfakeImpact: A Two-Stage Benchmark with Real-World Impact in Deepfake Detection: Chaoyu Gong,

Han Zhang,

Siqiang Luo; [pdf]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Chaoyu and Zhang, Han and Luo, Siqiang}, title = {DeepfakeImpact: A Two-Stage Benchmark with Real-World Impact in Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35451-35461} }
Learning Like Humans: Analogical Concept Learning for Generalized Category Discovery: Jizhou Han,

Chenhao Ding,

Yuhang He,

Qiang Wang,

Shaokun Wang,

SongLin Dong,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jizhou and Ding, Chenhao and He, Yuhang and Wang, Qiang and Wang, Shaokun and Dong, SongLin and Gong, Yihong}, title = {Learning Like Humans: Analogical Concept Learning for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28734-28743} }
Logit-Margin Repulsion for Backdoor Defense: Zhiguo Yang,

Dongsheng Xu,

Ruizhi Zhong,

Jiacheng Pi,

Xingxing Huang,

Wenjie Ruan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiguo and Xu, Dongsheng and Zhong, Ruizhi and Pi, Jiacheng and Huang, Xingxing and Ruan, Wenjie}, title = {Logit-Margin Repulsion for Backdoor Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34918-34928} }
Compositional Text-to-Image Generation Via Region-aware Bimodal Direct Preference Optimization: Zhuohan Liu,

Wujian Peng,

Yitong Chen,

Zuxuan Wu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhuohan and Peng, Wujian and Chen, Yitong and Wu, Zuxuan}, title = {Compositional Text-to-Image Generation Via Region-aware Bimodal Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36604-36614} }
Agentic Video Summarization via Self-Reflecting Multimodal Understanding: Miaotian Guo,

Shuguang Dou,

Yin Li,

Aidong Men,

Dongsheng Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Miaotian and Dou, Shuguang and Li, Yin and Men, Aidong and Jiang, Dongsheng}, title = {Agentic Video Summarization via Self-Reflecting Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40516-40526} }
Easy2Hard: From Partially to Fully Unmatched Modalities as Negative Samples in Contrastive Learning: Zhicheng Yang,

Yichen Liu,

Chang Ge,

Xiaopeng Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhicheng and Liu, Yichen and Ge, Chang and Jiang, Xiaopeng}, title = {Easy2Hard: From Partially to Fully Unmatched Modalities as Negative Samples in Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30226-30234} }
Revisiting the Necessity of Full Accuracy: Weakly Supervised Object-Level Offset Correction for Misaligned Building Labels: Junda Xu,

Yanmeng Liu,

Xiangqiang Zeng,

Jinrong Wu,

Ying Qu,

Libao Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Junda and Liu, Yanmeng and Zeng, Xiangqiang and Wu, Jinrong and Qu, Ying and Zhang, Libao}, title = {Revisiting the Necessity of Full Accuracy: Weakly Supervised Object-Level Offset Correction for Misaligned Building Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34854-34864} }
InterPhys: Physics-aware Human Motion Synthesis in a Dynamic Scene: Chaoyue Xing,

Wei Mao,

Miaomiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Chaoyue and Mao, Wei and Liu, Miaomiao}, title = {InterPhys: Physics-aware Human Motion Synthesis in a Dynamic Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30729-30739} }
Taming Noise-Induced Prototype Degradation for Privacy-Preserving Personalized Federated Fine-Tuning: Yuhua Wang,

Qinnan Zhang,

Xiaodong Li,

Huan Zhang,

Yifan Sun,

Wangjie Qiu,

Hainan Zhang,

Yongxin Tong,

Zhiming Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhua and Zhang, Qinnan and Li, Xiaodong and Zhang, Huan and Sun, Yifan and Qiu, Wangjie and Zhang, Hainan and Tong, Yongxin and Zheng, Zhiming}, title = {Taming Noise-Induced Prototype Degradation for Privacy-Preserving Personalized Federated Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39414-39424} }
Learning Mutual View Information Graph for Adaptive Adversarial Collaborative Perception: Yihang Tao,

Senkang Hu,

Haonan An,

Zhengru Fang,

Hangcheng Cao,

Yuguang Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Yihang and Hu, Senkang and An, Haonan and Fang, Zhengru and Cao, Hangcheng and Fang, Yuguang}, title = {Learning Mutual View Information Graph for Adaptive Adversarial Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42321-42330} }
ActAvatar: Temporally-Aware Precise Action Control for Talking Avatars: Ziqiao Peng,

Yi Chen,

Yifeng Ma,

Guozhen Zhang,

Zhiyao Sun,

Zixiang Zhou,

Youliang Zhang,

Zhengguang Zhou,

Zhaoxin Fan,

Hongyan Liu,

Yuan Zhou,

Qinglin Lu,

Jun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Ziqiao and Chen, Yi and Ma, Yifeng and Zhang, Guozhen and Sun, Zhiyao and Zhou, Zixiang and Zhang, Youliang and Zhou, Zhengguang and Fan, Zhaoxin and Liu, Hongyan and Zhou, Yuan and Lu, Qinglin and He, Jun}, title = {ActAvatar: Temporally-Aware Precise Action Control for Talking Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39983-39993} }
Enhancing Video Vision Language Model with Hippocampal Sensing: Xu Cao; [pdf]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Xu}, title = {Enhancing Video Vision Language Model with Hippocampal Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33682-33692} }
EgoMind: Activating Spatial Cognition through Linguistic Reasoning in MLLMs: Zhenghao Chen,

Huiqun Wang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhenghao and Wang, Huiqun and Huang, Di}, title = {EgoMind: Activating Spatial Cognition through Linguistic Reasoning in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38616-38626} }
SPREAD: Spatial-Physical REasoning via geometry Aware Diffusion: Minzhang Li,

Kuixiang Shao,

Xuebing Li,

Yuyang Jiao,

Yinuo Bai,

Hengan Zhou,

Sixian Shen,

Jiayuan Gu,

Jingyi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Minzhang and Shao, Kuixiang and Li, Xuebing and Jiao, Yuyang and Bai, Yinuo and Zhou, Hengan and Shen, Sixian and Gu, Jiayuan and Yu, Jingyi}, title = {SPREAD: Spatial-Physical REasoning via geometry Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31008-31018} }
SHands: A Multi-View Dataset and Benchmark for Surgical Hand-Gesture and Error Recognition Toward Medical Training: Le Ma,

Thiago Freitas dos Santos,

Nadia Magnenat-Thalmann,

Katarzyna Wac; [pdf]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Le and dos Santos, Thiago Freitas and Magnenat-Thalmann, Nadia and Wac, Katarzyna}, title = {SHands: A Multi-View Dataset and Benchmark for Surgical Hand-Gesture and Error Recognition Toward Medical Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42879-42890} }
Changes in Real Time: Online Scene Change Detection with Multi-View Fusion: Chamuditha Jayanga Galappaththige,

Jason Lai,

Lloyd Windrim,

Donald Dansereau,

Niko Suenderhauf,

Dimity Miller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Galappaththige_2026_CVPR, author = {Galappaththige, Chamuditha Jayanga and Lai, Jason and Windrim, Lloyd and Dansereau, Donald and Suenderhauf, Niko and Miller, Dimity}, title = {Changes in Real Time: Online Scene Change Detection with Multi-View Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32246-32256} }
Superman: Unifying Skeleton and Vision for Human Motion Perception and Generation: Xinshun Wang,

Peiming Li,

Ziyi Wang,

Zhongbin Fang,

Zhichao Deng,

Songtao Wu,

Jason Li,

Mengyuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinshun and Li, Peiming and Wang, Ziyi and Fang, Zhongbin and Deng, Zhichao and Wu, Songtao and Li, Jason and Liu, Mengyuan}, title = {Superman: Unifying Skeleton and Vision for Human Motion Perception and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38335-38344} }
Controllable Federated Prompt Learning at Test Time: Rui Zhu,

Liang Bai,

Yanming Guo,

Yirun Ruan,

Tianyuan Yu,

Zhihe Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Rui and Bai, Liang and Guo, Yanming and Ruan, Yirun and Yu, Tianyuan and Lu, Zhihe}, title = {Controllable Federated Prompt Learning at Test Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39455-39465} }
Diffusion Sampling Path Tells More: An Efficient Plug-and-Play Strategy for Sample Filtering: Sixian Wang,

Zhiwei Tang,

Tsung-Hui Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Sixian and Tang, Zhiwei and Chang, Tsung-Hui}, title = {Diffusion Sampling Path Tells More: An Efficient Plug-and-Play Strategy for Sample Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36030-36039} }
Group Editing: Edit Multiple Images in One Go: Yue Ma,

Xinyu Wang,

Qianli Ma,

Qinghe Wang,

Mingzhe Zheng,

Xiangpeng Yang,

Hao Li,

Chongbo Zhao,

Jixuan Ying,

Harry Yang,

Hongyu Liu,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yue and Wang, Xinyu and Ma, Qianli and Wang, Qinghe and Zheng, Mingzhe and Yang, Xiangpeng and Li, Hao and Zhao, Chongbo and Ying, Jixuan and Yang, Harry and Liu, Hongyu and Chen, Qifeng}, title = {Group Editing: Edit Multiple Images in One Go}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43418-43428} }
HERO: Hierarchical Embedding-Refinement for Open-Vocabulary Temporal Sentence Grounding in Videos: Tingting Han,

Xinsong Tao,

Yufei Yin,

Min Tan,

Sicheng Zhao,

Zhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Tingting and Tao, Xinsong and Yin, Yufei and Tan, Min and Zhao, Sicheng and Yu, Zhou}, title = {HERO: Hierarchical Embedding-Refinement for Open-Vocabulary Temporal Sentence Grounding in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31867-31876} }
Conversational Image Segmentation: Grounding Abstract Concepts with Scalable Supervision: Aadarsh Sahoo,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sahoo_2026_CVPR, author = {Sahoo, Aadarsh and Gkioxari, Georgia}, title = {Conversational Image Segmentation: Grounding Abstract Concepts with Scalable Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39476-39485} }
Demo2Tutorial: From Human Experience to Multimodal Software Tutorials: Zechen Bai,

Zhiheng Chen,

Yiqi Lin,

Kevin Qinghong Lin,

Difei Gao,

Xiangwu Guo,

Xin Wang,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Zechen and Chen, Zhiheng and Lin, Yiqi and Lin, Kevin Qinghong and Gao, Difei and Guo, Xiangwu and Wang, Xin and Shou, Mike Zheng}, title = {Demo2Tutorial: From Human Experience to Multimodal Software Tutorials}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29588-29597} }
Reliable Clustering Number Estimation for Contrastive Multi-View Clustering: Zhengzhong Zhu,

Pei Zhou,

Lanxi Bai,

Li Cheng,

Jia Nie,

Shiquan Min,

Jiangping Zhu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Zhou, Pei and Bai, Lanxi and Cheng, Li and Nie, Jia and Min, Shiquan and Zhu, Jiangping}, title = {Reliable Clustering Number Estimation for Contrastive Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30162-30171} }
F$^2$-Assist: Multi-Phase Fetal Growth Forecast and Report Generation from Ultrasound Examination: Bin Pu,

Xusheng Liang,

Xinpeng Ding,

Jinlin Wu,

Zhen Lei,

Shengli Li,

Kenli Li,

Jiawei Ma; [pdf]
[bibtex]
@InProceedings{Pu_2026_CVPR, author = {Pu, Bin and Liang, Xusheng and Ding, Xinpeng and Wu, Jinlin and Lei, Zhen and Li, Shengli and Li, Kenli and Ma, Jiawei}, title = {F\${\textasciicircum}2\$-Assist: Multi-Phase Fetal Growth Forecast and Report Generation from Ultrasound Examination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35341-35350} }
Online3R: Online Learning for Consistent Sequential Reconstruction Based on Geometry Foundation Model: Shunkai Zhou,

Zike Yan,

Fei Xue,

Dong Wu,

Yuchen Deng,

Hongbin Zha; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shunkai and Yan, Zike and Xue, Fei and Wu, Dong and Deng, Yuchen and Zha, Hongbin}, title = {Online3R: Online Learning for Consistent Sequential Reconstruction Based on Geometry Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36529-36538} }
Linking Modality Isolation in Heterogeneous Collaborative Perception: Changxing Liu,

Zichen Chao,

Siheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Changxing and Chao, Zichen and Chen, Siheng}, title = {Linking Modality Isolation in Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39765-39774} }
Expand and Prune: Maximizing Trajectory Diversity for Effective GRPO in Generative Models: Shiran Ge,

Chenyi Huang,

Yuang Ai,

Qihang Fan,

Huaibo Huang,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Shiran and Huang, Chenyi and Ai, Yuang and Fan, Qihang and Huang, Huaibo and He, Ran}, title = {Expand and Prune: Maximizing Trajectory Diversity for Effective GRPO in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41913-41922} }
Disentangled Textual Priors for Diffusion-based Image Super-Resolution: Lei Jiang,

Xin Liu,

Xinze Tong,

Zhiliang Li,

Jie Liu,

Jie Tang,

Gangshan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Lei and Liu, Xin and Tong, Xinze and Li, Zhiliang and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {Disentangled Textual Priors for Diffusion-based Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38228-38237} }
NG-GS: NeRF-guided 3D Gaussian Splatting Segmentation: Yi He,

Tao Wang,

Yi Jin,

Congyan Lang,

Yidong Li,

Haibin Ling; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yi and Wang, Tao and Jin, Yi and Lang, Congyan and Li, Yidong and Ling, Haibin}, title = {NG-GS: NeRF-guided 3D Gaussian Splatting Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42061-42070} }
Interactive Tracking: A Human-in-the-Loop Paradigm with Memory-Augmented Adaptation: Yuqing Huang,

Guotian Zeng,

Zhenqiao Yuan,

Zhenyu He,

Xin Li,

Yaowei Wang,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yuqing and Zeng, Guotian and Yuan, Zhenqiao and He, Zhenyu and Li, Xin and Wang, Yaowei and Yang, Ming-Hsuan}, title = {Interactive Tracking: A Human-in-the-Loop Paradigm with Memory-Augmented Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35166-35176} }
Degradation-Robust Fusion: An Efficient Degradation-Aware Diffusion Framework for Multimodal Image Fusion in Arbitrary Degradation Scenarios: Yu Shi,

Yu Liu,

Zhong-Cheng Wu,

Juan Cheng,

Huafeng Li,

Xun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yu and Liu, Yu and Wu, Zhong-Cheng and Cheng, Juan and Li, Huafeng and Chen, Xun}, title = {Degradation-Robust Fusion: An Efficient Degradation-Aware Diffusion Framework for Multimodal Image Fusion in Arbitrary Degradation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33848-33858} }
TUDSR: Twice Upsampling-Diffusion for Higher Super-Resolution: Zhiqiang Wu,

Yitong Dong,

Xian Wei; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhiqiang and Dong, Yitong and Wei, Xian}, title = {TUDSR: Twice Upsampling-Diffusion for Higher Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38208-38217} }
TriLite: Efficient Weakly Supervised Object Localization with Universal Visual Features and Tri-Region Disentanglement: Arian Sabaghi,

Jose Oramas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sabaghi_2026_CVPR, author = {Sabaghi, Arian and Oramas, Jose}, title = {TriLite: Efficient Weakly Supervised Object Localization with Universal Visual Features and Tri-Region Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41386-41395} }
VLM-3R: Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction: Zhiwen Fan,

Jian Zhang,

Renjie Li,

Junge Zhang,

Runjin Chen,

Hezhen Hu,

Kevin Wang,

Peihao Wang,

Huaizhi Qu,

Shijie Zhou,

Dilin Wang,

Zhicheng Yan,

Hongyu Xu,

Justin Theiss,

Tianlong Chen,

Jiachen Li,

Zhengzhong Tu,

Zhangyang Wang,

Rakesh Ranjan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Zhiwen and Zhang, Jian and Li, Renjie and Zhang, Junge and Chen, Runjin and Hu, Hezhen and Wang, Kevin and Wang, Peihao and Qu, Huaizhi and Zhou, Shijie and Wang, Dilin and Yan, Zhicheng and Xu, Hongyu and Theiss, Justin and Chen, Tianlong and Li, Jiachen and Tu, Zhengzhong and Wang, Zhangyang and Ranjan, Rakesh}, title = {VLM-3R: Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31054-31065} }
Sparsely Timing the Change: A Spiking Temporal Framework for Remote Sensing Interpretation: Shilong Li,

Xiurui Xie,

Qiugang Zhan,

Luochao Wang,

Yong Deng,

Guisong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shilong and Xie, Xiurui and Zhan, Qiugang and Wang, Luochao and Deng, Yong and Liu, Guisong}, title = {Sparsely Timing the Change: A Spiking Temporal Framework for Remote Sensing Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34575-34585} }
FreeForm: Reduced-Order Deformable Simulation from Particle-Based Skinning Eigenmodes: Donglai Xiang,

Vismay Modi,

Rishit Dagli,

Ty Trusty,

Gilles Daviet,

Anka He Chen,

Nicholas Sharp,

David I.W. Levin; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Donglai and Modi, Vismay and Dagli, Rishit and Trusty, Ty and Daviet, Gilles and Chen, Anka He and Sharp, Nicholas and Levin, David I.W.}, title = {FreeForm: Reduced-Order Deformable Simulation from Particle-Based Skinning Eigenmodes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32475-32484} }
LEAD: Minimizing Learner-Expert Asymmetry in End-to-End Driving: Long Nguyen,

Micha Fauth,

Bernhard Jaeger,

Daniel Dauner,

Maximilian Igl,

Andreas Geiger,

Kashyap Chitta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Long and Fauth, Micha and Jaeger, Bernhard and Dauner, Daniel and Igl, Maximilian and Geiger, Andreas and Chitta, Kashyap}, title = {LEAD: Minimizing Learner-Expert Asymmetry in End-to-End Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39775-39785} }
PromptLoop: Plug-and-Play Prompt Refinement via Latent Feedback for Diffusion Model Alignment: Suhyeon Lee,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Suhyeon and Ye, Jong Chul}, title = {PromptLoop: Plug-and-Play Prompt Refinement via Latent Feedback for Diffusion Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41859-41869} }
DIMOS: Disentangling Instance-level Moving Object Segmentation: Hongxiang Huang,

Hongwei Ren,

Xiaopeng Lin,

Yulong Huang,

Zeke Xie,

Bojun Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Hongxiang and Ren, Hongwei and Lin, Xiaopeng and Huang, Yulong and Xie, Zeke and Cheng, Bojun}, title = {DIMOS: Disentangling Instance-level Moving Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39806-39816} }
PrITTI: Primitive-based Generation of Controllable and Editable 3D Semantic Urban Scenes: Christina Ourania Tze,

Daniel Dauner,

Yiyi Liao,

Dzmitry Tsishkou,

Andreas Geiger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tze_2026_CVPR, author = {Tze, Christina Ourania and Dauner, Daniel and Liao, Yiyi and Tsishkou, Dzmitry and Geiger, Andreas}, title = {PrITTI: Primitive-based Generation of Controllable and Editable 3D Semantic Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32613-32624} }
DSCA: Dynamic Subspace Concept Alignment for Lifelong VLM Editing: Gyanendra Das,

Sai Jena; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Das_2026_CVPR, author = {Das, Gyanendra and Jena, Sai}, title = {DSCA: Dynamic Subspace Concept Alignment for Lifelong VLM Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40772-40781} }
BiGain: Unified Token Compression for Joint Generation and Classification: Jiacheng Liu,

Shengkun Tang,

Jiacheng Cui,

Dongkuan Xu,

Zhiqiang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiacheng and Tang, Shengkun and Cui, Jiacheng and Xu, Dongkuan and Shen, Zhiqiang}, title = {BiGain: Unified Token Compression for Joint Generation and Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31931-31940} }
HAWK: Head Importance-Aware Visual Token Pruning in Multimodal Models: Qihui Zhu,

Tao Zhang,

Yuchen Wang,

Shuangwu Chen,

Xiaobin Tan,

Jian Yang,

Yang Liu,

Yinfei Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Qihui and Zhang, Tao and Wang, Yuchen and Chen, Shuangwu and Tan, Xiaobin and Yang, Jian and Liu, Yang and Pan, Yinfei}, title = {HAWK: Head Importance-Aware Visual Token Pruning in Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39583-39592} }
EthoCLIP: Ontology-Enhanced Video-Language Pretraining for Animal Behavior Understanding: Yinuo Jing,

Jinyan Wu,

Zixi Yang,

Kongming Liang,

Xiatian Zhu,

Zhanyu Ma; [pdf] [supp]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Yinuo and Wu, Jinyan and Yang, Zixi and Liang, Kongming and Zhu, Xiatian and Ma, Zhanyu}, title = {EthoCLIP: Ontology-Enhanced Video-Language Pretraining for Animal Behavior Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31196-31206} }
MapReduce LoRA: Advancing the Pareto Front in Multi-Preference Optimization for Generative Models: Chieh-Yun Chen,

Zhonghao Wang,

Qi Chen,

Zhifan Ye,

Min Shi,

Yue Zhao,

Yinan Zhao,

Hui Qu,

Wei-An Lin,

Yiru Shen,

Ajinkya Kale,

Irfan Essa,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Chieh-Yun and Wang, Zhonghao and Chen, Qi and Ye, Zhifan and Shi, Min and Zhao, Yue and Zhao, Yinan and Qu, Hui and Lin, Wei-An and Shen, Yiru and Kale, Ajinkya and Essa, Irfan and Shi, Humphrey}, title = {MapReduce LoRA: Advancing the Pareto Front in Multi-Preference Optimization for Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34375-34384} }
Agentic Retoucher for Text-To-Image Generation: Shaocheng Shen,

Jianfeng Liang,

Chunlei Cai,

Cong Geng,

Huiyu Duan,

Xiaoyun Zhang,

Qiang Hu,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Shaocheng and Liang, Jianfeng and Cai, Chunlei and Geng, Cong and Duan, Huiyu and Zhang, Xiaoyun and Hu, Qiang and Zhai, Guangtao}, title = {Agentic Retoucher for Text-To-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29114-29125} }
Sensor2Sensor: Cross-Embodiment Sensor Conversion for Autonomous Driving: Jiahao Wang,

Bo Sun,

Yijing Bai,

Vincent Casser,

Songyou Peng,

Zehao Zhu,

Meng-Li Shih,

Xander Masotto,

Shih-Yang Su,

Kanaad Parvate,

Tiancheng Ge,

Linn Bieske,

Dragomir Anguelov,

Mingxing Tan,

Chiyu Max Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Sun, Bo and Bai, Yijing and Casser, Vincent and Peng, Songyou and Zhu, Zehao and Shih, Meng-Li and Masotto, Xander and Su, Shih-Yang and Parvate, Kanaad and Ge, Tiancheng and Bieske, Linn and Anguelov, Dragomir and Tan, Mingxing and Jiang, Chiyu Max}, title = {Sensor2Sensor: Cross-Embodiment Sensor Conversion for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32093-32102} }
RL-ScanIQA: Reinforcement-Learned Scanpaths for Blind 360deg Image Quality Assessment: Yujia Wang,

Yuyan Li,

Jiuming Liu,

Fang-Lue Zhang,

Xinhu Zheng,

Neil.A Dodgson; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yujia and Li, Yuyan and Liu, Jiuming and Zhang, Fang-Lue and Zheng, Xinhu and Dodgson, Neil.A}, title = {RL-ScanIQA: Reinforcement-Learned Scanpaths for Blind 360deg Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37401-37412} }
Wavelet-Driven 3D Anomaly Detection under Pose-Agnostic and Sparse-View: Mingwen Shao,

Qiao Zhang,

Xinyuan Chen,

Xiang Lv,

Lingzhuang Meng,

Chang Liu,

Qinglin Zhan,

Ling Jian; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Mingwen and Zhang, Qiao and Chen, Xinyuan and Lv, Xiang and Meng, Lingzhuang and Liu, Chang and Zhan, Qinglin and Jian, Ling}, title = {Wavelet-Driven 3D Anomaly Detection under Pose-Agnostic and Sparse-View}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43083-43092} }
OmniLottie: Generating Vector Animations via Parameterized Lottie Tokens: Yiying Yang,

Wei Cheng,

Sijin Chen,

Honghao Fu,

Xianfang Zeng,

Yujun Cai,

Gang Yu,

Xingjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yiying and Cheng, Wei and Chen, Sijin and Fu, Honghao and Zeng, Xianfang and Cai, Yujun and Yu, Gang and Ma, Xingjun}, title = {OmniLottie: Generating Vector Animations via Parameterized Lottie Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39293-39303} }
GeneVAR: Causal MeanFlow for Autoregressive Gene-to-WSI Tile Synthesis: Jianwei Zhao,

Fan Yang,

Xin Li,

Qiang Zhai,

Ao Luo,

Ziqi Ren,

Zhicheng Jiao,

Hong Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianwei and Yang, Fan and Li, Xin and Zhai, Qiang and Luo, Ao and Ren, Ziqi and Jiao, Zhicheng and Cheng, Hong}, title = {GeneVAR: Causal MeanFlow for Autoregressive Gene-to-WSI Tile Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34116-34125} }
Bilevel Layer-Positioning LoRA for Real Image Dehazing: Yan Zhang,

Long Ma,

Yuxin Feng,

Zhe Huang,

Fan Zhou,

Zhuo Su; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yan and Ma, Long and Feng, Yuxin and Huang, Zhe and Zhou, Fan and Su, Zhuo}, title = {Bilevel Layer-Positioning LoRA for Real Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29969-29978} }
TrajTok: Learning Trajectory Tokens Enhances Video Understanding: Chenhao Zheng,

Jieyu Zhang,

Jianing Zhang,

Weikai Huang,

Ashutosh Kumar,

Quan Kong,

Oncel Tuzel,

Chun-Liang Li,

Ranjay Krishna; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Chenhao and Zhang, Jieyu and Zhang, Jianing and Huang, Weikai and Kumar, Ashutosh and Kong, Quan and Tuzel, Oncel and Li, Chun-Liang and Krishna, Ranjay}, title = {TrajTok: Learning Trajectory Tokens Enhances Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31207-31218} }
3D-VCD: Hallucination Mitigation in 3D-LLM Embodied Agents through Visual Contrastive Decoding: Makanjuola Adekunmi Ogunleye,

Eman Abdelrahman,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ogunleye_2026_CVPR, author = {Ogunleye, Makanjuola Adekunmi and Abdelrahman, Eman and Lourentzou, Ismini}, title = {3D-VCD: Hallucination Mitigation in 3D-LLM Embodied Agents through Visual Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40197-40207} }
SAT-RRG: LLM-Guided Self-Adaptive Training for Radiology Report Generation with Token-Level Push-Pull Optimization: Yunyi Liu,

Yingshu Li,

Tong Chen,

Lingqiao Liu,

Lei Wang,

Luping Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yunyi and Li, Yingshu and Chen, Tong and Liu, Lingqiao and Wang, Lei and Zhou, Luping}, title = {SAT-RRG: LLM-Guided Self-Adaptive Training for Radiology Report Generation with Token-Level Push-Pull Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35363-35372} }
dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models: Yi Xin,

Siqi Luo,

Tianxiang Xu,

Qi Qin,

Haoxing Chen,

Kaiwen Zhu,

Zhiwei Zhang,

Yangfan He,

Rongchao Zhang,

Jinbin Bai,

Shuo Cao,

Bin Fu,

Junjun He,

Yihao Liu,

Yuewen Cao,

Xiaohong Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xin_2026_CVPR, author = {Xin, Yi and Luo, Siqi and Xu, Tianxiang and Qin, Qi and Chen, Haoxing and Zhu, Kaiwen and Zhang, Zhiwei and He, Yangfan and Zhang, Rongchao and Bai, Jinbin and Cao, Shuo and Fu, Bin and He, Junjun and Liu, Yihao and Cao, Yuewen and Liu, Xiaohong}, title = {dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35726-35735} }
Conan: Progressive Learning to Reason Like a Detective over Multi-Scale Visual Evidence: Kun Ouyang,

Yuanxin Liu,

Linli Yao,

Yishuo Cai,

Hao Zhou,

Fandong Meng,

Jie Zhou,

Xu Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Kun and Liu, Yuanxin and Yao, Linli and Cai, Yishuo and Zhou, Hao and Meng, Fandong and Zhou, Jie and Sun, Xu}, title = {Conan: Progressive Learning to Reason Like a Detective over Multi-Scale Visual Evidence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41089-41099} }
Chain-of-Models Pre-Training: Rethinking Training Acceleration of Vision Foundation Models: Jiawei Fan,

Shigeng Wang,

Chao Li,

Xiaolong Liu,

Anbang Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Jiawei and Wang, Shigeng and Li, Chao and Liu, Xiaolong and Yao, Anbang}, title = {Chain-of-Models Pre-Training: Rethinking Training Acceleration of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34491-34501} }
Masked Auto-Regressive Variational Acceleration: Fast Inference Makes Practical Reinforcement Learning: Yuxuan Gu,

Weimin Bai,

Yifei Wang,

Weijian Luo,

He Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Yuxuan and Bai, Weimin and Wang, Yifei and Luo, Weijian and Sun, He}, title = {Masked Auto-Regressive Variational Acceleration: Fast Inference Makes Practical Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41881-41891} }
ZoomEarth: Active Perception for Ultra-High-Resolution Geospatial Vision-Language Tasks: Ruixun Liu,

Bowen Fu,

Jiayi Song,

Kaiyu Li,

Wanchen Li,

Lanxuan Xue,

Hui Qiao,

Weizhan Zhang,

Deyu Meng,

Xiangyong Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ruixun and Fu, Bowen and Song, Jiayi and Li, Kaiyu and Li, Wanchen and Xue, Lanxuan and Qiao, Hui and Zhang, Weizhan and Meng, Deyu and Cao, Xiangyong}, title = {ZoomEarth: Active Perception for Ultra-High-Resolution Geospatial Vision-Language Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34877-34888} }
IAFMNet: Information-Aware Feature Modulation for Efficient Super-Resolution: Junwei Xu,

Mengzu Liu,

Zhenyu Wang,

Fangfang Wu,

Sijia Wu,

Tao Huang,

Weisheng Dong; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Junwei and Liu, Mengzu and Wang, Zhenyu and Wu, Fangfang and Wu, Sijia and Huang, Tao and Dong, Weisheng}, title = {IAFMNet: Information-Aware Feature Modulation for Efficient Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30564-30573} }
PortraitDirector: A Hierarchical Disentanglement Framework for Controllable and Real-time Facial Reenactment: Chaonan Ji,

Jinwei Qi,

Sheng Xu,

Peng Zhang,

Bang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Chaonan and Qi, Jinwei and Xu, Sheng and Zhang, Peng and Zhang, Bang}, title = {PortraitDirector: A Hierarchical Disentanglement Framework for Controllable and Real-time Facial Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32377-32388} }
FlashVSR: Towards Real-time Diffusion-Based Streaming Video Super Resolution: Junhao Zhuang,

Shi Guo,

Xin Cai,

Xiaohui Li,

Yihao Liu,

Chun Yuan,

Tianfan Xue; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Junhao and Guo, Shi and Cai, Xin and Li, Xiaohui and Liu, Yihao and Yuan, Chun and Xue, Tianfan}, title = {FlashVSR: Towards Real-time Diffusion-Based Streaming Video Super Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43482-43493} }
R-C2: Cycle-Consistent Reinforcement Learning Improves Multimodal Reasoning: Zirui Zhang,

Haoyu Dong,

Kexin Pei,

Chengzhi Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zirui and Dong, Haoyu and Pei, Kexin and Mao, Chengzhi}, title = {R-C2: Cycle-Consistent Reinforcement Learning Improves Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36893-36903} }
Hear What You See: Video-to-Audio Generation with Diffusion Transformer and Semantic-Temporal Alignment-Ranked Direct Preference Optimization: Kai Wang,

Tao Zhou,

Jiayi Lei,

Jing Wang,

Jinman Zhao,

Weiguo Pian,

Yuan Cheng,

Yapeng Tian,

Peng Gao,

Bin Fu,

Yihao Liu,

Dimitrios Hatzinakos,

Yuewen Cao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Kai and Zhou, Tao and Lei, Jiayi and Wang, Jing and Zhao, Jinman and Pian, Weiguo and Cheng, Yuan and Tian, Yapeng and Gao, Peng and Fu, Bin and Liu, Yihao and Hatzinakos, Dimitrios and Cao, Yuewen}, title = {Hear What You See: Video-to-Audio Generation with Diffusion Transformer and Semantic-Temporal Alignment-Ranked Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43396-43406} }
ORD: Object-Relation Decoupling for Generalized 3D Visual Grounding: Ronggang Huang,

Fansen Meng,

Huaidong Zhang,

Xuemiao Xu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Ronggang and Meng, Fansen and Zhang, Huaidong and Xu, Xuemiao}, title = {ORD: Object-Relation Decoupling for Generalized 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30964-30973} }
UCMNet: Uncertainty-Aware Context Memory Network for Under-Display Camera Image Restoration: Daehyun Kim,

Youngmin Kim,

Yoon Ju Oh,

Tae Hyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Daehyun and Kim, Youngmin and Oh, Yoon Ju and Kim, Tae Hyun}, title = {UCMNet: Uncertainty-Aware Context Memory Network for Under-Display Camera Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29939-29948} }
PARSE: Part-Aware Relational Spatial Modeling: Yinuo Bai,

Peijun Xu,

Kuixiang Shao,

Yuyang Jiao,

Jingxuan Zhang,

Kaixin Yao,

Jiayuan Gu,

Jingyi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Yinuo and Xu, Peijun and Shao, Kuixiang and Jiao, Yuyang and Zhang, Jingxuan and Yao, Kaixin and Gu, Jiayuan and Yu, Jingyi}, title = {PARSE: Part-Aware Relational Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38700-38710} }
SDTrack: A Baseline for Event-based Tracking via Spiking Neural Networks: Yimeng Shan,

Zhenbang Ren,

Haodi Wu,

Wenjie Wei,

Rui-Jie Zhu,

Shuai Wang,

Dehao Zhang,

Yichen Xiao,

Jieyuan Zhang,

Kexin Shi,

Jingzhinan Wang,

Jason K. Eshraghian,

Haicheng Qu,

Malu Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Yimeng and Ren, Zhenbang and Wu, Haodi and Wei, Wenjie and Zhu, Rui-Jie and Wang, Shuai and Zhang, Dehao and Xiao, Yichen and Zhang, Jieyuan and Shi, Kexin and Wang, Jingzhinan and Eshraghian, Jason K. and Qu, Haicheng and Zhang, Malu}, title = {SDTrack: A Baseline for Event-based Tracking via Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36245-36254} }
UniPercept: A Unified Diffusion Model for Generalizable Visual Perception: Zuyan Zhao,

Zhenliang He,

Meina Kan,

Shiguang Shan,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zuyan and He, Zhenliang and Kan, Meina and Shan, Shiguang and Chen, Xilin}, title = {UniPercept: A Unified Diffusion Model for Generalizable Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43175-43186} }
BrepVGAE: Variational Graph Autoencoder with Unified Latent Representation for B-rep: Hao Guo,

Liyuan Deng,

Yongkang Dai,

Ruohan Wang,

Jiahao Li,

Yunpeng Bai,

Yilei Shi; [pdf]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hao and Deng, Liyuan and Dai, Yongkang and Wang, Ruohan and Li, Jiahao and Bai, Yunpeng and Shi, Yilei}, title = {BrepVGAE: Variational Graph Autoencoder with Unified Latent Representation for B-rep}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39230-39238} }
DynFusion: Rethinking Condition Fusion for Adaptive Multi-Conditional Text-to-Image Generation: Zheng Fang,

Lichuan Xiang,

Xu Cai,

Bing Wang,

Bo Yang,

Hongkai Wen; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Zheng and Xiang, Lichuan and Cai, Xu and Wang, Bing and Yang, Bo and Wen, Hongkai}, title = {DynFusion: Rethinking Condition Fusion for Adaptive Multi-Conditional Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29103-29113} }
LongVT: Incentivizing "Thinking with Long Videos" via Native Tool Calling: Zuhao Yang,

Sudong Wang,

Kaichen Zhang,

Keming Wu,

Sicong Leng,

Yifan Zhang,

Bo Li,

Chengwei Qin,

Shijian Lu,

Xingxuan Li,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zuhao and Wang, Sudong and Zhang, Kaichen and Wu, Keming and Leng, Sicong and Zhang, Yifan and Li, Bo and Qin, Chengwei and Lu, Shijian and Li, Xingxuan and Bing, Lidong}, title = {LongVT: Incentivizing ''Thinking with Long Videos'' via Native Tool Calling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33816-33826} }
Elucidating the SNR-t Bias of Diffusion Probabilistic Models: Meng Yu,

Lei Sun,

Jianhao Zeng,

Xiangxiang Chu,

Kun Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Meng and Sun, Lei and Zeng, Jianhao and Chu, Xiangxiang and Zhan, Kun}, title = {Elucidating the SNR-t Bias of Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43461-43470} }
ChArtist: Generating Pictorial Charts with Unified Spatial and Subject Control: Shishi Xiao,

Tongyu Zhou,

David H. Laidlaw,

Gromit Yeuk-Yin Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Shishi and Zhou, Tongyu and Laidlaw, David H. and Chan, Gromit Yeuk-Yin}, title = {ChArtist: Generating Pictorial Charts with Unified Spatial and Subject Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29211-29221} }
Adaptive Video Distillation: Mitigating Oversaturation and Temporal Collapse in Few-Step Generation: Yuyang You,

Yongzhi Li,

Jiahui Li,

Yadong Mu,

Quan Chen,

Peng Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Yuyang and Li, Yongzhi and Li, Jiahui and Mu, Yadong and Chen, Quan and Jiang, Peng}, title = {Adaptive Video Distillation: Mitigating Oversaturation and Temporal Collapse in Few-Step Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43429-43439} }
Interpretable Cross-Domain Few-Shot Learning with Rectified Target-Domain Local Alignment: Yaze Zhao,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yaze and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Interpretable Cross-Domain Few-Shot Learning with Rectified Target-Domain Local Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41605-41615} }
Structural Action Transformer for 3D Dexterous Manipulation: Xiaohan Lei,

Min Wang,

Bohong Weng,

Wengang Zhou,

Houqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Xiaohan and Wang, Min and Weng, Bohong and Zhou, Wengang and Li, Houqiang}, title = {Structural Action Transformer for 3D Dexterous Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28807-28818} }
SECOS: Semantic Capture for Rigorous Classification in Open-World Semi-Supervised Learning: Hezhao Liu,

Jiacheng Yang,

Junlong Gao,

Mengke Li,

Yiqun Zhang,

Shreyank N Gowda,

Yang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hezhao and Yang, Jiacheng and Gao, Junlong and Li, Mengke and Zhang, Yiqun and Gowda, Shreyank N and Lu, Yang}, title = {SECOS: Semantic Capture for Rigorous Classification in Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39627-39636} }
Mechanisms of Object Localization in Vision-Language Models: Timothy Schaumlöffel,

Martina G. Vilas,

Gemma Roig; [pdf] [supp]
[bibtex]
@InProceedings{Schaumloffel_2026_CVPR, author = {Schauml\"offel, Timothy and Vilas, Martina G. and Roig, Gemma}, title = {Mechanisms of Object Localization in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31356-31365} }
Bridge: Basis-Driven Causal Inference Marries VFMs for Domain Generalization: Mingbo Hong,

Feng Liu,

Caroline Gevaert,

George Vosselman,

Hao Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Mingbo and Liu, Feng and Gevaert, Caroline and Vosselman, George and Cheng, Hao}, title = {Bridge: Basis-Driven Causal Inference Marries VFMs for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31962-31973} }
Video-as-Answer: Predict and Generate Next Video Event with Joint-GRPO: Junhao Cheng,

Liang Hou,

Xin Tao,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Junhao and Hou, Liang and Tao, Xin and Liao, Jing}, title = {Video-as-Answer: Predict and Generate Next Video Event with Joint-GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38915-38925} }
Refer-Agent: A Collaborative Multi-Agent System with Reasoning and Reflection for Referring Video Object Segmentation: Haichao Jiang,

Tianming Liang,

Wei-Shi Zheng,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haichao and Liang, Tianming and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Refer-Agent: A Collaborative Multi-Agent System with Reasoning and Reflection for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39508-39517} }
KVSmooth: Mitigating Hallucination in Multi-modal Large Language Models through Key-Value Smoothing: Siyu Jiang,

Feiyang Chen,

Xiaojin Zhang,

Kun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Siyu and Chen, Feiyang and Zhang, Xiaojin and He, Kun}, title = {KVSmooth: Mitigating Hallucination in Multi-modal Large Language Models through Key-Value Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32561-32571} }
PaCo-RL: Advancing Reinforcement Learning for Consistent Image Generation with Pairwise Reward Modeling: Bowen Ping,

Chengyou Jia,

Minnan Luo,

Changliang Xia,

Xin Shen,

Zhuohang Dang,

Hangwei Qian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ping_2026_CVPR, author = {Ping, Bowen and Jia, Chengyou and Luo, Minnan and Xia, Changliang and Shen, Xin and Dang, Zhuohang and Qian, Hangwei}, title = {PaCo-RL: Advancing Reinforcement Learning for Consistent Image Generation with Pairwise Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34353-34363} }
Mitigating Multimodal Hallucinations via Gradient-based Self-Reflection: Shan Wang,

Maying Shen,

Nadine Chang,

Chuong Nguyen,

Hongdong Li,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shan and Shen, Maying and Chang, Nadine and Nguyen, Chuong and Li, Hongdong and Alvarez, Jose M.}, title = {Mitigating Multimodal Hallucinations via Gradient-based Self-Reflection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32539-32549} }
INSIGHT Bench: Towards Grounded IN-SItu Guidance for Robotic ManipulaTion: Seonho Kim,

Junhyeong Hong,

Kyungjae Lee,

Yoonseon Oh; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seonho and Hong, Junhyeong and Lee, Kyungjae and Oh, Yoonseon}, title = {INSIGHT Bench: Towards Grounded IN-SItu Guidance for Robotic ManipulaTion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35070-35079} }
FSLoRA: Harmonizing Detection and Re-Identification via Freq-Spatial Low-Rank Adapter for One-Stage Person Search: Yanling Tian,

Shanshan Zhang,

Di Chen,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Yanling and Zhang, Shanshan and Chen, Di and Yang, Jian}, title = {FSLoRA: Harmonizing Detection and Re-Identification via Freq-Spatial Low-Rank Adapter for One-Stage Person Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40428-40437} }
Concept-Aware LoRA for Domain-Aligned Segmentation Dataset Generation: Minho Park,

Sunghyun Park,

Jungsoo Lee,

Hyojin Park,

Kyuwoong Hwang,

Fatih Porikli,

Jaegul Choo,

Sungha Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Minho and Park, Sunghyun and Lee, Jungsoo and Park, Hyojin and Hwang, Kyuwoong and Porikli, Fatih and Choo, Jaegul and Choi, Sungha}, title = {Concept-Aware LoRA for Domain-Aligned Segmentation Dataset Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39858-39868} }
VISion On Request: Enhanced VLLM efficiency with sparse, dynamically selected, vision-language interactions: Adrian Bulat,

Alberto Baldrati,

Ioannis Maniadis Metaxas,

Yassine Ouali,

Georgios Tzimiropoulos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bulat_2026_CVPR, author = {Bulat, Adrian and Baldrati, Alberto and Metaxas, Ioannis Maniadis and Ouali, Yassine and Tzimiropoulos, Georgios}, title = {VISion On Request: Enhanced VLLM efficiency with sparse, dynamically selected, vision-language interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31920-31930} }
HiLoRA: Hierarchical Low-Rank Adaptation for Personalized Federated Learning: Zihao Peng,

Nan Zou,

Jiandian Zeng,

Guo Li,

Ke Chen,

Boyuan Li,

Tian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Zihao and Zou, Nan and Zeng, Jiandian and Li, Guo and Chen, Ke and Li, Boyuan and Wang, Tian}, title = {HiLoRA: Hierarchical Low-Rank Adaptation for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31746-31757} }
TrafficAlign: Aligning Large Language Models for Traffic Scenario Generation: Zhi Tu,

Liangkun Niu,

Tianyi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Zhi and Niu, Liangkun and Zhang, Tianyi}, title = {TrafficAlign: Aligning Large Language Models for Traffic Scenario Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39744-39754} }
Reward Sharpness-Aware Fine-Tuning for Diffusion Models: Kwanyoung Kim,

Byeongsu Sim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Kwanyoung and Sim, Byeongsu}, title = {Reward Sharpness-Aware Fine-Tuning for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36051-36061} }
SGDE: Self-supervised Geometry Degradation Estimation Framework for Coded Aperture Compressive Spectral Imaging: Yuqiao He,

Xiaoyan Liu,

Jianxu Mao,

Yaonan Wang,

Hui Zhang,

Lizhu Liu,

Yurong Chen,

Wenbin He; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yuqiao and Liu, Xiaoyan and Mao, Jianxu and Wang, Yaonan and Zhang, Hui and Liu, Lizhu and Chen, Yurong and He, Wenbin}, title = {SGDE: Self-supervised Geometry Degradation Estimation Framework for Coded Aperture Compressive Spectral Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34084-34094} }
MindPower: Enabling Theory-of-Mind Reasoning in VLM-based Embodied Agents: Ruoxuan Zhang,

Qiyun Zheng,

Zhiyu Zhou,

Ziqi Liao,

Siyu Wu,

Jian-Yu Jiang-Lin,

Bin Wen,

Hongxia Xie,

Jianlong Fu,

Wen-Huang Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruoxuan and Zheng, Qiyun and Zhou, Zhiyu and Liao, Ziqi and Wu, Siyu and Jiang-Lin, Jian-Yu and Wen, Bin and Xie, Hongxia and Fu, Jianlong and Cheng, Wen-Huang}, title = {MindPower: Enabling Theory-of-Mind Reasoning in VLM-based Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29631-29641} }
Online Data Curation for Object Detection via Marginal Contributions to Dataset-level Average Precision: Zitang Sun,

Masakazu Yoshimura,

Junji Otsuka,

Atsushi Irie,

Takeshi Ohashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zitang and Yoshimura, Masakazu and Otsuka, Junji and Irie, Atsushi and Ohashi, Takeshi}, title = {Online Data Curation for Object Detection via Marginal Contributions to Dataset-level Average Precision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32787-32797} }
HierAmp: Coarse-to-Fine Autoregressive Amplification for Generative Dataset Distillation: Lin Zhao,

Xinru Jiang,

Xi Xiao,

Qihui Fan,

Lei Lu,

Yanzhi Wang,

Xue Lin,

Octavia Camps,

Pu Zhao,

Jianyang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lin and Jiang, Xinru and Xiao, Xi and Fan, Qihui and Lu, Lei and Wang, Yanzhi and Lin, Xue and Camps, Octavia and Zhao, Pu and Gu, Jianyang}, title = {HierAmp: Coarse-to-Fine Autoregressive Amplification for Generative Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41688-41698} }
Leveraging Class Distributions in CLIP for Weakly Supervised Semantic Segmentation: Ziqian Yang,

Xinqiao Zhao,

Xiaolei Wang,

Quan Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ziqian and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {Leveraging Class Distributions in CLIP for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34714-34723} }
Accelerating Diffusion Model Training under Minimal Budgets: A Condensation-Based Perspective: Rui Huang,

Shitong Shao,

zikai zhou,

Pukun Zhao,

Hangyu Guo,

Tian Ye,

Lichen Bai,

Shuo Yang,

Zeke Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Rui and Shao, Shitong and zhou, zikai and Zhao, Pukun and Guo, Hangyu and Ye, Tian and Bai, Lichen and Yang, Shuo and Xie, Zeke}, title = {Accelerating Diffusion Model Training under Minimal Budgets: A Condensation-Based Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43621-43631} }
Defect Cue-Preserved Structural Feature Refinement for Few-Shot Anomaly Detection: Le Jiang,

Yan Huang,

Zhen Xu,

Yong Xu,

Hau-San Wong,

Si Wu; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Le and Huang, Yan and Xu, Zhen and Xu, Yong and Wong, Hau-San and Wu, Si}, title = {Defect Cue-Preserved Structural Feature Refinement for Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35607-35616} }
Neural Mixture Density Processes: Yi Ding,

Qi Tao,

Xingxing Liang,

Longfei Zhang,

Yiqin Lv,

Weitao Song,

Fangjie Yang,

Cheems Wang,

Guangquan Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Yi and Tao, Qi and Liang, Xingxing and Zhang, Longfei and Lv, Yiqin and Song, Weitao and Yang, Fangjie and Wang, Cheems and Cheng, Guangquan}, title = {Neural Mixture Density Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39680-39690} }
Distributed Image Compression with Multimodal Side Information at Extremely Low Bitrates: Guojun Xu,

Mingyang Zhang,

Jianwen Xiang,

Cheng Tan,

Yanchao Yang,

Junwei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guojun and Zhang, Mingyang and Xiang, Jianwen and Tan, Cheng and Yang, Yanchao and Zhou, Junwei}, title = {Distributed Image Compression with Multimodal Side Information at Extremely Low Bitrates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33662-33671} }
FreqEdit: Preserving High-Frequency Features for Robust Multi-Turn Image Editing: Yucheng Liao,

Jiajun Liang,

Kaiqian Cui,

Baoquan Zhao,

Haoran Xie,

Wei Liu,

Qing Li,

Xudong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Yucheng and Liang, Jiajun and Cui, Kaiqian and Zhao, Baoquan and Xie, Haoran and Liu, Wei and Li, Qing and Mao, Xudong}, title = {FreqEdit: Preserving High-Frequency Features for Robust Multi-Turn Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43525-43535} }
Smart Replay: Adaptive Scheduling of Memory Rehearsal for Computational Resource-Aware Incremental Learning: Jianting Chen,

Dianzhi Yu,

Irwin King; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jianting and Yu, Dianzhi and King, Irwin}, title = {Smart Replay: Adaptive Scheduling of Memory Rehearsal for Computational Resource-Aware Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39945-39961} }
Layer Consistency Matters: Elegant Latent Transition Discrepancy for Generalizable Synthetic Image Detection: Yawen Yang,

Feng Li,

Shuqi Kong,

Yunfeng Diao,

Xinjian Gao,

Zenglin Shi,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yawen and Li, Feng and Kong, Shuqi and Diao, Yunfeng and Gao, Xinjian and Shi, Zenglin and Wang, Meng}, title = {Layer Consistency Matters: Elegant Latent Transition Discrepancy for Generalizable Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38111-38121} }
PointNSP: Autoregressive 3D Point Cloud Generation with Next-Scale Level-of-Detail Prediction: Ziqiao Meng,

Qichao Wang,

Zhiyang Dou,

Zixing Song,

Zhipeng Zhou,

Irwin King,

Peilin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Ziqiao and Wang, Qichao and Dou, Zhiyang and Song, Zixing and Zhou, Zhipeng and King, Irwin and Zhao, Peilin}, title = {PointNSP: Autoregressive 3D Point Cloud Generation with Next-Scale Level-of-Detail Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31450-31461} }
RankOOD - Class Ranking-based Out-of-Distribution Detection: Dishanika Denipitiyage,

Naveen Karunanayake,

Suranga Seneviratne,

Sanjay Chawla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Denipitiyage_2026_CVPR, author = {Denipitiyage, Dishanika and Karunanayake, Naveen and Seneviratne, Suranga and Chawla, Sanjay}, title = {RankOOD - Class Ranking-based Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42280-42289} }
SASNet: Spatially-Adaptive Sinusoidal Networks for INRs: Haoan Feng,

Diana Aldana,

Tiago Novello,

Leila De Floriani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Haoan and Aldana, Diana and Novello, Tiago and De Floriani, Leila}, title = {SASNet: Spatially-Adaptive Sinusoidal Networks for INRs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41964-41973} }
IncreFA: Breaking the Static Wall of Generative Model Attribution: Haotian Qin,

Dongliang Chang,

Yueying Gao,

Yuexuan Tan,

Lei Chen,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Haotian and Chang, Dongliang and Gao, Yueying and Tan, Yuexuan and Chen, Lei and Ma, Zhanyu}, title = {IncreFA: Breaking the Static Wall of Generative Model Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35405-35415} }
Rethinking Camera Choice: An Empirical Study on Fisheye Camera Properties in Robotic Manipulation: Han Xue,

Nan Min,

Xiaotong Liu,

Wendi Chen,

Yuan Fang,

Jun Lv,

Cewu Lu,

Chuan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Han and Min, Nan and Liu, Xiaotong and Chen, Wendi and Fang, Yuan and Lv, Jun and Lu, Cewu and Wen, Chuan}, title = {Rethinking Camera Choice: An Empirical Study on Fisheye Camera Properties in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35059-35069} }
BAMI: Training-Free Bias Mitigation in GUI Grounding: Borui Zhang,

Bo Zhang,

Bo Wang,

Wenzhao Zheng,

Yuhao Cheng,

Liang Tang,

Yiqiang Yan,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Borui and Zhang, Bo and Wang, Bo and Zheng, Wenzhao and Cheng, Yuhao and Tang, Liang and Yan, Yiqiang and Zhou, Jie and Lu, Jiwen}, title = {BAMI: Training-Free Bias Mitigation in GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34596-34605} }
Cinematic Audio Source Separation Using Visual Cues: Kang Zhang,

Suyeon Lee,

Arda Senocak,

Joon Son Chung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kang and Lee, Suyeon and Senocak, Arda and Chung, Joon Son}, title = {Cinematic Audio Source Separation Using Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37874-37884} }
GrOCE : Graph-Guided Online Concept Erasure for Text-to-Image Diffusion Models: Ning Han,

Zhenyu Ge,

Feng Han,

Yuhua Sun,

Chengqing Li,

Jingjing Chen; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Ning and Ge, Zhenyu and Han, Feng and Sun, Yuhua and Li, Chengqing and Chen, Jingjing}, title = {GrOCE : Graph-Guided Online Concept Erasure for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43536-43545} }
NeAR: Coupled Neural Asset-Renderer Stack: Hong Li,

Chongjie Ye,

Houyuan Chen,

Weiqing Xiao,

Ziyang Yan,

Lixing Xiao,

Zhaoxi Chen,

Jianfeng Xiang,

Shaocong Xu,

Xuhui Liu,

Yikai Wang,

Baochang Zhang,

Xiaoguang Han,

Jiaolong Yang,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hong and Ye, Chongjie and Chen, Houyuan and Xiao, Weiqing and Yan, Ziyang and Xiao, Lixing and Chen, Zhaoxi and Xiang, Jianfeng and Xu, Shaocong and Liu, Xuhui and Wang, Yikai and Zhang, Baochang and Han, Xiaoguang and Yang, Jiaolong and Zhao, Hao}, title = {NeAR: Coupled Neural Asset-Renderer Stack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29834-29844} }
ProgressiveAvatars: Progressive Animatable 3D Gaussian Avatars: Kaiwen Song,

Jinkai Cui,

Juyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Kaiwen and Cui, Jinkai and Zhang, Juyong}, title = {ProgressiveAvatars: Progressive Animatable 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32518-32527} }
Scene Grounding in the Wild: Tamir Cohen,

Leo Segre,

Shay Shomer-Chai,

Shai Avidan,

Hadar Averbuch-Elor; [pdf] [supp]
[bibtex]
@InProceedings{Cohen_2026_CVPR, author = {Cohen, Tamir and Segre, Leo and Shomer-Chai, Shay and Avidan, Shai and Averbuch-Elor, Hadar}, title = {Scene Grounding in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33353-33363} }
All Roads Lead to Rome: Incentivizing Divergent Thinking in Vision-Language Models: Xinyu Tian,

Shu Zou,

Zhaoyuan Yang,

Mengqi He,

Peter Tu,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and He, Mengqi and Tu, Peter and Zhang, Jing}, title = {All Roads Lead to Rome: Incentivizing Divergent Thinking in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33488-33498} }
MCHDoc: A Comprehensive Benchmark for Reading Multi-Carrier Chinese Historical Documents: Yijun Sheng,

Shipeng Zhu,

Ruijia Zuo,

Na Nie,

Hui Xue; [pdf] [supp]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Yijun and Zhu, Shipeng and Zuo, Ruijia and Nie, Na and Xue, Hui}, title = {MCHDoc: A Comprehensive Benchmark for Reading Multi-Carrier Chinese Historical Documents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38722-38731} }
GM-R^2: Generative Matching Learning for Unsupervised Geometric Representation and Registration: Haobo Jiang,

Liang Yu,

Jianmin Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haobo and Yu, Liang and Zheng, Jianmin}, title = {GM-R{\textasciicircum}2: Generative Matching Learning for Unsupervised Geometric Representation and Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31430-31439} }
Low-Rank Residual Diffusion Models: Junfu Tan,

Jiang Yuan; [pdf]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Junfu and Yuan, Jiang}, title = {Low-Rank Residual Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35747-35757} }
Cross-View Splatter: Feed-Forward View Synthesis with Georeferenced Images: Matias Turkulainen,

Akshay Krishnan,

Filippo Aleotti,

Mohamed Sayed,

Guillermo Garcia-Hernando,

Juho Kannala,

Arno Solin,

Gabriel Brostow,

Daniyar Turmukhambetov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Turkulainen_2026_CVPR, author = {Turkulainen, Matias and Krishnan, Akshay and Aleotti, Filippo and Sayed, Mohamed and Garcia-Hernando, Guillermo and Kannala, Juho and Solin, Arno and Brostow, Gabriel and Turmukhambetov, Daniyar}, title = {Cross-View Splatter: Feed-Forward View Synthesis with Georeferenced Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40959-40970} }
Chain of Event-Centric Causal Thought for Physically Plausible Video Generation: Zixuan Wang,

Yixin Hu,

Haolan Wang,

Feng Chen,

Yan Liu,

Wen Li,

Yinjie Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zixuan and Hu, Yixin and Wang, Haolan and Chen, Feng and Liu, Yan and Li, Wen and Lei, Yinjie}, title = {Chain of Event-Centric Causal Thought for Physically Plausible Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38122-38131} }
SunFaded: Illumination-Aware Gaussian Splatting for Dark Scenes with Camera-Mounted Active Lighting: Wenjie Chang,

Tianle Ding,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Wenjie and Ding, Tianle and Yang, Wenfei and Zhang, Tianzhu}, title = {SunFaded: Illumination-Aware Gaussian Splatting for Dark Scenes with Camera-Mounted Active Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40876-40885} }
MM-SeR: Multimodal Self-Refinement for Lightweight Image Captioning: Junha Song,

Yongsik Jo,

So Yeon Min,

Quanting Xie,

Taehwan Kim,

Yonatan Bisk,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Junha and Jo, Yongsik and Min, So Yeon and Xie, Quanting and Kim, Taehwan and Bisk, Yonatan and Choo, Jaegul}, title = {MM-SeR: Multimodal Self-Refinement for Lightweight Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30929-30940} }
Thermally Activated Dual-Modal Adversarial Clothing against AI Surveillance Systems: Jiahuan Long,

Tingsong Jiang,

Hanqing Liu,

Chao Ma,

Weien Zhou,

Yang Yang,

Wen Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Jiahuan and Jiang, Tingsong and Liu, Hanqing and Ma, Chao and Zhou, Weien and Yang, Yang and Yao, Wen}, title = {Thermally Activated Dual-Modal Adversarial Clothing against AI Surveillance Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34929-34939} }
Zero-shot Detection of AI-Generated Image via RAW-RGB Alignment: Haiwei Wu,

Fengpeng Li,

Zhilin Tu,

Yuanman Li,

Xiong Li,

Jiantao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haiwei and Li, Fengpeng and Tu, Zhilin and Li, Yuanman and Li, Xiong and Zhou, Jiantao}, title = {Zero-shot Detection of AI-Generated Image via RAW-RGB Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42997-43007} }
Hunting Normality from Query Sample via Residual Learning for Generalist Anomaly Detection: Xiaolei Wang,

Yuexin Wang,

Tianhong Dai,

Huihui Bai,

Yao Zhao,

Jimin Xiao; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaolei and Wang, Yuexin and Dai, Tianhong and Bai, Huihui and Zhao, Yao and Xiao, Jimin}, title = {Hunting Normality from Query Sample via Residual Learning for Generalist Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43093-43102} }
MedKCO: Medical Vision-Language Pretraining via Knowledge-Driven Cognitive Orchestration: Chenran Zhang,

Ruiqi Wu,

Tao Zhou,

Yi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenran and Wu, Ruiqi and Zhou, Tao and Zhou, Yi}, title = {MedKCO: Medical Vision-Language Pretraining via Knowledge-Driven Cognitive Orchestration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35260-35269} }
BIT: Matching-based Bi-directional Interaction Transformation Network for Visible-Infrared Person Re-Identification: Haoxuan Xu,

Guanglin Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Haoxuan and Niu, Guanglin}, title = {BIT: Matching-based Bi-directional Interaction Transformation Network for Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40386-40396} }
TeamHOI: Learning a Unified Policy for Cooperative Human-Object Interactions with Any Team Size: Stefan Lionar,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lionar_2026_CVPR, author = {Lionar, Stefan and Lee, Gim Hee}, title = {TeamHOI: Learning a Unified Policy for Cooperative Human-Object Interactions with Any Team Size}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37121-37132} }
Regulating Rather than Constraining: Adaptive Guidance for Complex Spectral Reconstruction in Pansharpening: Zhuwei Wen,

Zimin Xia,

He Chen,

Linwei Yue,

Xianwei Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Zhuwei and Xia, Zimin and Chen, He and Yue, Linwei and Zheng, Xianwei}, title = {Regulating Rather than Constraining: Adaptive Guidance for Complex Spectral Reconstruction in Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34832-34842} }
SAQN: Semantic-based Adaptive Query Network for 3D Referring Expression Segmentation: Jiale Huang,

Shangfei Wang; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiale and Wang, Shangfei}, title = {SAQN: Semantic-based Adaptive Query Network for 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38627-38636} }
Making the Classification Explanation Faithful to the Confidence Score: Jian-Xun Mi,

Lu Pan,

Weisheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Mi_2026_CVPR, author = {Mi, Jian-Xun and Pan, Lu and Li, Weisheng}, title = {Making the Classification Explanation Faithful to the Confidence Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38959-38968} }
Selective Amnesia using Contrastive Subnet Erasure for Class Level Unlearning in Vision Models: Vishal Pramanik,

Maisha Maliha,

Susmit Jha,

Alvaro Velasquez,

Olivera Kotevska,

Sumit Kumar Jha; [pdf] [supp]
[bibtex]
@InProceedings{Pramanik_2026_CVPR, author = {Pramanik, Vishal and Maliha, Maisha and Jha, Susmit and Velasquez, Alvaro and Kotevska, Olivera and Jha, Sumit Kumar}, title = {Selective Amnesia using Contrastive Subnet Erasure for Class Level Unlearning in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31662-31671} }
VT-Intrinsic: Physics-Based Decomposition of Reflectance and Shading using a Single Visible-Thermal Image Pair: Zeqing Yuan,

Mani Ramanagopal,

Aswin C. Sankaranarayanan,

Srinivasa G. Narasimhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Zeqing and Ramanagopal, Mani and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {VT-Intrinsic: Physics-Based Decomposition of Reflectance and Shading using a Single Visible-Thermal Image Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41752-41761} }
Thinking in Dynamics: How Multimodal Large Language Models Perceive, Track, and Reason Dynamics in Physical 4D World: Yuzhi Huang,

Kairun Wen,

Rongxin Gao,

Dongxuan Liu,

Yibin Lou,

Jie Wu,

Jing Xu,

Jian Zhang,

Zheng Yang,

Yunlong Lin,

Chenxin Li,

Panwang Pan,

Junbin Lu,

Jingyan Jiang,

Xinghao Ding,

Yue Huang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yuzhi and Wen, Kairun and Gao, Rongxin and Liu, Dongxuan and Lou, Yibin and Wu, Jie and Xu, Jing and Zhang, Jian and Yang, Zheng and Lin, Yunlong and Li, Chenxin and Pan, Panwang and Lu, Junbin and Jiang, Jingyan and Ding, Xinghao and Huang, Yue and Wang, Zhi}, title = {Thinking in Dynamics: How Multimodal Large Language Models Perceive, Track, and Reason Dynamics in Physical 4D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33446-33456} }
PHAC: Promptable Human Amodal Completion: Seung Young Noh,

Ju Yong Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noh_2026_CVPR, author = {Noh, Seung Young and Chang, Ju Yong}, title = {PHAC: Promptable Human Amodal Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30750-30760} }
Outlier-Robust Diffusion Solvers for Inverse Problems: Yang Zheng,

Jiahua Liu,

Tongyao Pang,

Wen Li,

Zhaoqiang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yang and Liu, Jiahua and Pang, Tongyao and Li, Wen and Liu, Zhaoqiang}, title = {Outlier-Robust Diffusion Solvers for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30782-30791} }
C^2FG: Control Classifier-Free Guidance via Score Discrepancy Analysis: Jiayang Gao,

Tianyi Zheng,

Jiayang Zou,

Fengxiang Yang,

Shice Liu,

Luyao Fan,

Zheyu Zhang,

Hao Zhang,

Jinwei Chen,

Peng-Tao Jiang,

Bo Li,

Jia Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jiayang and Zheng, Tianyi and Zou, Jiayang and Yang, Fengxiang and Liu, Shice and Fan, Luyao and Zhang, Zheyu and Zhang, Hao and Chen, Jinwei and Jiang, Peng-Tao and Li, Bo and Wang, Jia}, title = {C{\textasciicircum}2FG: Control Classifier-Free Guidance via Score Discrepancy Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34398-34407} }
Thermal is Always Wild: Characterizing and Addressing Challenges in Thermal-Only Novel View Synthesis: M. Kerem Aydin,

Vishwanath Saragadam,

Emma Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aydin_2026_CVPR, author = {Aydin, M. Kerem and Saragadam, Vishwanath and Alexander, Emma}, title = {Thermal is Always Wild: Characterizing and Addressing Challenges in Thermal-Only Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29845-29854} }
RHO: Robust Holistic OSM-Based Metric Cross-View Geo-Localization: Junwei Zheng,

Ruize Dai,

Ruiping Liu,

Zichao Zeng,

Yufan Chen,

Fangjinhua Wang,

Kunyu Peng,

Kailun Yang,

Jiaming Zhang,

Rainer Stiefelhagen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Junwei and Dai, Ruize and Liu, Ruiping and Zeng, Zichao and Chen, Yufan and Wang, Fangjinhua and Peng, Kunyu and Yang, Kailun and Zhang, Jiaming and Stiefelhagen, Rainer}, title = {RHO: Robust Holistic OSM-Based Metric Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33727-33737} }
Uncertainty-guided Compositional Alignment with Part-to-Whole Semantic Representativeness in Hyperbolic Vision-Language Models: Hayeon Kim,

Ji Ha Jang,

Junghun James Kim,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hayeon and Jang, Ji Ha and Kim, Junghun James and Chun, Se Young}, title = {Uncertainty-guided Compositional Alignment with Part-to-Whole Semantic Representativeness in Hyperbolic Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36861-36870} }
Self-Critical Distillation Network for Video-based Commonsense Captioning: Mengqi Yuan,

Gengyun Jia,

Bing-Kun Bao; [pdf]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Mengqi and Jia, Gengyun and Bao, Bing-Kun}, title = {Self-Critical Distillation Network for Video-based Commonsense Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40527-40536} }
Inter-Edit: First Benchmark for Interactive Instruction-Based Image Editing: Delong Liu,

Haotian Hou,

Zhaohui Hou,

Zhiyuan Huang,

Shihao Han,

Mingjie Zhan,

Zhicheng Zhao,

Fei Su; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Delong and Hou, Haotian and Hou, Zhaohui and Huang, Zhiyuan and Han, Shihao and Zhan, Mingjie and Zhao, Zhicheng and Su, Fei}, title = {Inter-Edit: First Benchmark for Interactive Instruction-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37290-37300} }
Guiding Diffusion Models with Semantically Degraded Conditions: Shilong Han,

Yuming Zhang,

Hongxia Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Shilong and Zhang, Yuming and Wang, Hongxia}, title = {Guiding Diffusion Models with Semantically Degraded Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43653-43663} }
Meta-CoT: Enhancing Granularity and Generalization in Image Editing: Shiyi Zhang,

Yiji Cheng,

Tiankai Hang,

Zijin Yin,

Runze He,

Yu Xu,

Wenxun Dai,

Yunlong Lin,

Chunyu Wang,

Qinglin Lu,

Yansong Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shiyi and Cheng, Yiji and Hang, Tiankai and Yin, Zijin and He, Runze and Xu, Yu and Dai, Wenxun and Lin, Yunlong and Wang, Chunyu and Lu, Qinglin and Tang, Yansong}, title = {Meta-CoT: Enhancing Granularity and Generalization in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38004-38015} }
AutoCut: End-to-end advertisement video editing based on multimodal discretization and controllable generation: Milton Zhou,

Sizhong Qin,

Yongzhi Li,

Quan Chen,

Peng Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Milton and Qin, Sizhong and Li, Yongzhi and Chen, Quan and Jiang, Peng}, title = {AutoCut: End-to-end advertisement video editing based on multimodal discretization and controllable generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37777-37787} }
TempoControl: Temporal Attention Guidance for Text-to-Video Models: Shira Schiber,

Ofir Lindenbaum,

Idan Schwartz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schiber_2026_CVPR, author = {Schiber, Shira and Lindenbaum, Ofir and Schwartz, Idan}, title = {TempoControl: Temporal Attention Guidance for Text-to-Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36670-36679} }
PanoVGGT: Feed-Forward 3D Reconstruction from Panoramic Imagery: Yijing Guo,

Mengjun Chao,

Luo Wang,

Tianyang Zhao,

Haizhao Dai,

Yingliang Zhang,

Jingyi Yu,

Yujiao Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yijing and Chao, Mengjun and Wang, Luo and Zhao, Tianyang and Dai, Haizhao and Zhang, Yingliang and Yu, Jingyi and Shi, Yujiao}, title = {PanoVGGT: Feed-Forward 3D Reconstruction from Panoramic Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36444-36453} }
NavForesee: A Unified Vision-Language World Model for Hierarchical Planning and Dual-Horizon Navigation Prediction: Fei Liu,

Shichao Xie,

Minghua Luo,

Zedong Chu,

Junjun Hu,

Xiaolong Wu,

Mu Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Fei and Xie, Shichao and Luo, Minghua and Chu, Zedong and Hu, Junjun and Wu, Xiaolong and Xu, Mu}, title = {NavForesee: A Unified Vision-Language World Model for Hierarchical Planning and Dual-Horizon Navigation Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32431-32440} }
RealAppiance: Let High-fidelity Appliance Assets Controllable and Workable as Aligned Real Manauls: Yuzheng Gao,

Yuxing Long,

Lei Kang,

Yuchong Guo,

Ziyan Yu,

Shangqing Mao,

Jiyao Zhang,

Ruihai Wu,

Dongjiang Li,

Hui Shen,

Hao Dong; [pdf]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuzheng and Long, Yuxing and Kang, Lei and Guo, Yuchong and Yu, Ziyan and Mao, Shangqing and Zhang, Jiyao and Wu, Ruihai and Li, Dongjiang and Shen, Hui and Dong, Hao}, title = {RealAppiance: Let High-fidelity Appliance Assets Controllable and Workable as Aligned Real Manauls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37186-37194} }
Few-Shot Hybrid Incremental Learning:Continually Learning under Data Scarcity and Task Uncertainty: Yan Li,

Yuzhu Shi,

Kan Zhou,

Shu Zhang,

Diqi He,

Dingwen Zhang,

Junwei Han; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Shi, Yuzhu and Zhou, Kan and Zhang, Shu and He, Diqi and Zhang, Dingwen and Han, Junwei}, title = {Few-Shot Hybrid Incremental Learning:Continually Learning under Data Scarcity and Task Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32334-32344} }
FINE: Factorizing Knowledge for Initialization of Variable-sized Diffusion Models: Yucheng Xie,

Fu Feng,

Ruixiao Shi,

Jianlu Shen,

Jing Wang,

Yong Rui,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yucheng and Feng, Fu and Shi, Ruixiao and Shen, Jianlu and Wang, Jing and Rui, Yong and Geng, Xin}, title = {FINE: Factorizing Knowledge for Initialization of Variable-sized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42018-42028} }
OctoNav: Towards Generalist Embodied Navigation: Chen Gao,

Liankai Jin,

Xingyu Peng,

Jiazhao Zhang,

Yue Deng,

Annan Li,

He Wang,

Si Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Chen and Jin, Liankai and Peng, Xingyu and Zhang, Jiazhao and Deng, Yue and Li, Annan and Wang, He and Liu, Si}, title = {OctoNav: Towards Generalist Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40074-40084} }
Semantic Alignment for Pose-Invariant Identity Preserving Diffusion: Jiwon Kim,

SeonHwa Kim,

Soobin Park,

Eunju Cha,

Kyong Hwan Jin; [pdf]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jiwon and Kim, SeonHwa and Park, Soobin and Cha, Eunju and Jin, Kyong Hwan}, title = {Semantic Alignment for Pose-Invariant Identity Preserving Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43363-43372} }
Minerva-Ego: Spatiotemporal Hints for Egocentric Video Understanding: Arsha Nagrani,

Jasper Uijlings,

Shyamal Buch,

Tobias Weyand,

Sudheendra Vijayanarasimhan,

Bo Hu,

Ramin Mehran,

David A Ross,

Cordelia Schmid; [pdf] [supp]
[bibtex]
@InProceedings{Nagrani_2026_CVPR, author = {Nagrani, Arsha and Uijlings, Jasper and Buch, Shyamal and Weyand, Tobias and Vijayanarasimhan, Sudheendra and Hu, Bo and Mehran, Ramin and A Ross, David and Schmid, Cordelia}, title = {Minerva-Ego: Spatiotemporal Hints for Egocentric Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38859-38869} }
CaReFlow: Cyclic Adaptive Rectified Flow for Multimodal Fusion: Sijie Mai,

Shiqin Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Sijie and Han, Shiqin}, title = {CaReFlow: Cyclic Adaptive Rectified Flow for Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37799-37809} }
RAP: Fast Feedforward Rendering-Free Attribute-Guided Primitive Importance Score Prediction for Efficient 3D Gaussian Splatting Processing: Kaifa Yang,

Qi Yang,

Yiling Xu,

Zhu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Kaifa and Yang, Qi and Xu, Yiling and Li, Zhu}, title = {RAP: Fast Feedforward Rendering-Free Attribute-Guided Primitive Importance Score Prediction for Efficient 3D Gaussian Splatting Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33323-33332} }
Sketch2CT: Multimodal Diffusion for Structure-Aware 3D Medical Volume Generation: Delin An,

Chaoli Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Delin and Wang, Chaoli}, title = {Sketch2CT: Multimodal Diffusion for Structure-Aware 3D Medical Volume Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37600-37610} }
Unlocking Pre-trained Weights: Parameter Inheritance for Zero-Shot Initialization: Jiaze Xu,

Shiyu Xia,

Jiaqi Lv,

Xin Geng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiaze and Xia, Shiyu and Lv, Jiaqi and Geng, Xin}, title = {Unlocking Pre-trained Weights: Parameter Inheritance for Zero-Shot Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34502-34511} }
UniChange: Unifying Change Detection with Multimodal Large Language Model: Xu Zhang,

Danyang Li,

Xiaohang Dong,

Tianhao Wu,

Hualong Yu,

Jianye Wang,

Qicheng Li,

Xiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xu and Li, Danyang and Dong, Xiaohang and Wu, Tianhao and Yu, Hualong and Wang, Jianye and Li, Qicheng and Li, Xiang}, title = {UniChange: Unifying Change Detection with Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42169-42179} }
ChartR: Evaluating Reasoning Accuracy and Robustness in Chart Question Answering: Xiaojun Chen,

Sixiao Luo,

Ziqi Liu,

Min Yang,

Qin Zhang,

Liang-Jie Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaojun and Luo, Sixiao and Liu, Ziqi and Yang, Min and Zhang, Qin and Zhang, Liang-Jie}, title = {ChartR: Evaluating Reasoning Accuracy and Robustness in Chart Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41193-41202} }
Elucidating the Design Space of Arbitrary-Noise-Based Diffusion Models: Xingyu Qiu,

Mengying Yang,

Xinghua Ma,

Dong Liang,

Fanding Li,

Gongning Luo,

Wei Wang,

Kuanquan Wang,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xingyu and Yang, Mengying and Ma, Xinghua and Liang, Dong and Li, Fanding and Luo, Gongning and Wang, Wei and Wang, Kuanquan and Li, Shuo}, title = {Elucidating the Design Space of Arbitrary-Noise-Based Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30833-30842} }
VIVA: VLM-Guided Instruction-Based Video Editing with Reward Optimization: Xiaoyan Cong,

Haotian Yang,

Angtian Wang,

Yizhi Wang,

Yiding Yang,

Canyu Zhang,

Chongyang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cong_2026_CVPR, author = {Cong, Xiaoyan and Yang, Haotian and Wang, Angtian and Wang, Yizhi and Yang, Yiding and Zhang, Canyu and Ma, Chongyang}, title = {VIVA: VLM-Guided Instruction-Based Video Editing with Reward Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34364-34374} }
Adaptive 3D Perception for Small Aerial Targets Under Sparse Sampling via Reinforcement Learning: Shenghai Yuan,

Wei Yihan,

Jason Yee,

Zhuoran Qiao,

boyang lou,

Enwen Hu; [pdf]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Shenghai and Yihan, Wei and Yee, Jason and Qiao, Zhuoran and lou, boyang and Hu, Enwen}, title = {Adaptive 3D Perception for Small Aerial Targets Under Sparse Sampling via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39064-39074} }
FluxMem: Adaptive Hierarchical Memory for Streaming Video Understanding: Yiweng Xie,

Bo He,

Junke Wang,

Xiangyu Zheng,

Ziyi Ye,

Zuxuan Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yiweng and He, Bo and Wang, Junke and Zheng, Xiangyu and Ye, Ziyi and Wu, Zuxuan}, title = {FluxMem: Adaptive Hierarchical Memory for Streaming Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31272-31282} }
Adapting In-context Generation for Enhanced Composed Image Retrieval: Haiwen Li,

Zining Chen,

Delong Liu,

Zhaohui Hou,

Zhicheng Zhao,

Fei Su; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haiwen and Chen, Zining and Liu, Delong and Hou, Zhaohui and Zhao, Zhicheng and Su, Fei}, title = {Adapting In-context Generation for Enhanced Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29167-29177} }
Focus on Background: Exploring SAM's Potential in Few-shot Medical Image Segmentation with Background-centric Prompting: Yuntian Bo,

Yazhou Zhu,

Piotr Koniusz,

Haofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Yuntian and Zhu, Yazhou and Koniusz, Piotr and Zhang, Haofeng}, title = {Focus on Background: Exploring SAM's Potential in Few-shot Medical Image Segmentation with Background-centric Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30032-30041} }
CoD: A Diffusion Foundation Model for Image Compression: Zhaoyang Jia,

Zihan Zheng,

Naifu Xue,

Jiahao Li,

Bin Li,

Zongyu Guo,

Xiaoyi Zhang,

Houqiang Li,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Zhaoyang and Zheng, Zihan and Xue, Naifu and Li, Jiahao and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Li, Houqiang and Lu, Yan}, title = {CoD: A Diffusion Foundation Model for Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38420-38429} }
CRFT: Consistent-Recurrent Feature Flow Transformer for Cross-Modal Image Registration: Xuecong Liu,

Mengzhu Ding,

Zixuan Sun,

Zhang Li,

Xichao Teng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xuecong and Ding, Mengzhu and Sun, Zixuan and Li, Zhang and Teng, Xichao}, title = {CRFT: Consistent-Recurrent Feature Flow Transformer for Cross-Modal Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34784-34794} }
Hierarchical Visual Relocalization with Nearest View Synthesis from Feature Gaussian Splatting: Huaqi Tao,

Bingxi Liu,

Guangcheng Chen,

Fulin Tang,

Li He,

Hong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Huaqi and Liu, Bingxi and Chen, Guangcheng and Tang, Fulin and He, Li and Zhang, Hong}, title = {Hierarchical Visual Relocalization with Nearest View Synthesis from Feature Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40981-40991} }
Unleashing Vision-Language Semantics for Deepfake Video Detection: Jiawen Zhu,

Yunqi Miao,

Xueyi Zhang,

Jiankang Deng,

Guansong Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jiawen and Miao, Yunqi and Zhang, Xueyi and Deng, Jiankang and Pang, Guansong}, title = {Unleashing Vision-Language Semantics for Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42953-42963} }
Uni-Hema: Unified Model for Digital Hematopathology: Abdul Rehman,

Iqra Rasool,

Ayisha Imran,

Mohsen Ali,

Waqas Sultani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rehman_2026_CVPR, author = {Rehman, Abdul and Rasool, Iqra and Imran, Ayisha and Ali, Mohsen and Sultani, Waqas}, title = {Uni-Hema: Unified Model for Digital Hematopathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37578-37589} }
Breaking the Continuum: Discrete Distribution Learning for Structural MRI Reconstruction: Tianle Lyu,

Mengjingcheng Mo,

Ting Wen,

Zhen Song,

Zinan Xiong,

Yanjie Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Tianle and Mo, Mengjingcheng and Wen, Ting and Song, Zhen and Xiong, Zinan and Zhu, Yanjie}, title = {Breaking the Continuum: Discrete Distribution Learning for Structural MRI Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37568-37577} }
SpaceDrive: Infusing Spatial Awareness into VLM-based Autonomous Driving: Peizheng Li,

Zhenghao Zhang,

David Holtz,

Hang Yu,

Yutong Yang,

Yuzhi Lai,

Rui Song,

Andreas Geiger,

Andreas Zell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Peizheng and Zhang, Zhenghao and Holtz, David and Yu, Hang and Yang, Yutong and Lai, Yuzhi and Song, Rui and Geiger, Andreas and Zell, Andreas}, title = {SpaceDrive: Infusing Spatial Awareness into VLM-based Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40096-40107} }
CodeMMR: Bridging Natural Language, Code, and Image for Unified Retrieval: Jiahui Geng,

Qing Li,

Fengyu Cai,

Fakhri Karray; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Jiahui and Li, Qing and Cai, Fengyu and Karray, Fakhri}, title = {CodeMMR: Bridging Natural Language, Code, and Image for Unified Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38742-38752} }
RemedyGS: Defend 3D Gaussian Splatting Against Computation Cost Attacks: Yanping Li,

Zhening Liu,

Zijian Li,

Zehong Lin,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanping and Liu, Zhening and Li, Zijian and Lin, Zehong and Zhang, Jun}, title = {RemedyGS: Defend 3D Gaussian Splatting Against Computation Cost Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33227-33236} }
Beyond Explicit Language: Plug-and-Play Visual-to-Linguistic Modeling Toward General Object Tracking: Kaiyang Lan,

Ying Cui,

Chenchen Jing,

Jianwei Zheng,

Dongyan Guo; [pdf] [supp]
[bibtex]
@InProceedings{Lan_2026_CVPR, author = {Lan, Kaiyang and Cui, Ying and Jing, Chenchen and Zheng, Jianwei and Guo, Dongyan}, title = {Beyond Explicit Language: Plug-and-Play Visual-to-Linguistic Modeling Toward General Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42604-42614} }
Benchmarking Endoscopic Surgical Image Restoration and Beyond: Jialun Pei,

Diandian Guo,

Donghui Yang,

Zhixi Li,

Yuxin Feng,

Long Ma,

Bo Du,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2026_CVPR, author = {Pei, Jialun and Guo, Diandian and Yang, Donghui and Li, Zhixi and Feng, Yuxin and Ma, Long and Du, Bo and Heng, Pheng-Ann}, title = {Benchmarking Endoscopic Surgical Image Restoration and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37413-37422} }
EagleVision: A Dual-Stage Framework with BEV-grounding-based Chain-of-Thought for Spatial Intelligence: Jiaxu Wan,

Xu Wang,

Mengwei Xie,

Hang Zhang,

Mu Xu,

Yang Han,

Ding Yuan,

Hong Zhang,

Yifan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Jiaxu and Wang, Xu and Xie, Mengwei and Zhang, Hang and Xu, Mu and Han, Yang and Yuan, Ding and Zhang, Hong and Yang, Yifan}, title = {EagleVision: A Dual-Stage Framework with BEV-grounding-based Chain-of-Thought for Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38637-38646} }
StableMTL: Repurposing Latent Diffusion Models for Multi-Task Learning from Partially Annotated Synthetic Datasets: Anh-Quan Cao,

Ivan Lopes,

Raoul de Charette; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Anh-Quan and Lopes, Ivan and de Charette, Raoul}, title = {StableMTL: Repurposing Latent Diffusion Models for Multi-Task Learning from Partially Annotated Synthetic Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37788-37798} }
VideoChat-M1: Collaborative Policy Planning for Video Understanding via Multi-Agent Reinforcement Learning: Boyu Chen,

Zikang Wang,

Zhengrong Yue,

Kainan Yan,

Chenyun Yu,

Yi Huang,

Zijun Liu,

Yafei Wen,

Xiaoxin Chen,

Yang Liu,

Peng Li,

Yali Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Boyu and Wang, Zikang and Yue, Zhengrong and Yan, Kainan and Yu, Chenyun and Huang, Yi and Liu, Zijun and Wen, Yafei and Chen, Xiaoxin and Liu, Yang and Li, Peng and Wang, Yali}, title = {VideoChat-M1: Collaborative Policy Planning for Video Understanding via Multi-Agent Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33772-33783} }
ArchSym: Detecting 3D-Grounded Architectural Symmetries in the Wild: Hanyu Chen,

Ruojin Cai,

Steve Marschner,

Noah Snavely; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hanyu and Cai, Ruojin and Marschner, Steve and Snavely, Noah}, title = {ArchSym: Detecting 3D-Grounded Architectural Symmetries in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36561-36570} }
Teaching DINOv3 About Partial 3D Geometry: A Self-Supervised Geometry-Aware Approach: Viktoria Ehm,

Dongliang Cao,

Riccardo Marin,

Daniel Scholz,

Weikang Wang,

Florian Bernard,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Ehm_2026_CVPR, author = {Ehm, Viktoria and Cao, Dongliang and Marin, Riccardo and Scholz, Daniel and Wang, Weikang and Bernard, Florian and Cremers, Daniel}, title = {Teaching DINOv3 About Partial 3D Geometry: A Self-Supervised Geometry-Aware Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42071-42081} }
Multi-modal Frequency Decomposition Network for Semantic Scene Completion: Die Zuo,

Lubo Wang,

Ruonan Liu,

Qing Guo,

Chong Wang,

Dongdong Wu,

Wei Feng,

Kairui Yang,

Di Lin; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Die and Wang, Lubo and Liu, Ruonan and Guo, Qing and Wang, Chong and Wu, Dongdong and Feng, Wei and Yang, Kairui and Lin, Di}, title = {Multi-modal Frequency Decomposition Network for Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41531-41540} }
Target-Aware Invertible Encoder with Reconstruction Guidance for Infrared Small Target Detection: Shule Yan,

Zetian Zhang,

Xiao Ma,

Zexuan Ji; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Shule and Zhang, Zetian and Ma, Xiao and Ji, Zexuan}, title = {Target-Aware Invertible Encoder with Reconstruction Guidance for Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32714-32723} }
SD-FSMIS: Adapting Stable Diffusion for Few-Shot Medical Image Segmentation: Meihua Li,

Yang Zhang,

Weizhao He,

Hu Qu,

Yisong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Meihua and Zhang, Yang and He, Weizhao and Qu, Hu and Li, Yisong}, title = {SD-FSMIS: Adapting Stable Diffusion for Few-Shot Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29979-29989} }
The Invisible Gorilla Effect in Out-of-distribution Detection: Harry Anthony,

Ziyun Liang,

Hermione Warr,

Konstantinos Kamnitsas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Anthony_2026_CVPR, author = {Anthony, Harry and Liang, Ziyun and Warr, Hermione and Kamnitsas, Konstantinos}, title = {The Invisible Gorilla Effect in Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39314-39325} }
Learning by Analogy: A Causal Framework for Compositional Generalization: Lingjing Kong,

Shaoan Xie,

Yang Jiao,

Yetian Chen,

Yanhui Guo,

Simone Shao,

Yan Gao,

Guangyi Chen,

Kun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Lingjing and Xie, Shaoan and Jiao, Yang and Chen, Yetian and Guo, Yanhui and Shao, Simone and Gao, Yan and Chen, Guangyi and Zhang, Kun}, title = {Learning by Analogy: A Causal Framework for Compositional Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36615-36626} }
UniPart: Part-Level 3D Generation with Unified 3D Geom-Seg Latents: Xufan He,

Yushuang Wu,

Xiaoyang Guo,

Chongjie Ye,

Jiaqing Zhou,

Tianlei Hu,

Xiaoguang Han,

Dong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xufan and Wu, Yushuang and Guo, Xiaoyang and Ye, Chongjie and Zhou, Jiaqing and Hu, Tianlei and Han, Xiaoguang and Du, Dong}, title = {UniPart: Part-Level 3D Generation with Unified 3D Geom-Seg Latents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34227-34236} }
Language-Guided One-Step Diffusion Model for Nighttime Flare Removal: Aoxiang Ning,

Kailong Yu,

Minglong Xue,

Liyuan Pan,

Jinhong He,

Wenchao Yan,

Mingliang Zhou,

Yirui Wu; [pdf] [supp]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Aoxiang and Yu, Kailong and Xue, Minglong and Pan, Liyuan and He, Jinhong and Yan, Wenchao and Zhou, Mingliang and Wu, Yirui}, title = {Language-Guided One-Step Diffusion Model for Nighttime Flare Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38442-38452} }
Instance-level Visual Active Tracking with Occlusion-Aware Planning: Haowei Sun,

Kai Zhou,

Hao Gao,

Shiteng Zhang,

Jinwu Hu,

Xutao Wen,

Qixiang Ye,

Mingkui Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Haowei and Zhou, Kai and Gao, Hao and Zhang, Shiteng and Hu, Jinwu and Wen, Xutao and Ye, Qixiang and Tan, Mingkui}, title = {Instance-level Visual Active Tracking with Occlusion-Aware Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42494-42504} }
GeoDiff4D: Geometry-Aware Diffusion for 4D Head Avatar Reconstruction: Chao Xu,

Xiaochen Zhao,

Xiang Deng,

Jingxiang Sun,

Donglin Di,

Zhuo Su,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Chao and Zhao, Xiaochen and Deng, Xiang and Sun, Jingxiang and Di, Donglin and Su, Zhuo and Liu, Yebin}, title = {GeoDiff4D: Geometry-Aware Diffusion for 4D Head Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32485-32495} }
LLaVAShield: Safeguarding Multimodal Multi-Turn Dialogues in Vision-Language Models: Guolei Huang,

Qinzhi Peng,

Gan Xu,

Yao Huang,

Yuxuan Lu,

Yongjun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Guolei and Peng, Qinzhi and Xu, Gan and Huang, Yao and Lu, Yuxuan and Shen, Yongjun}, title = {LLaVAShield: Safeguarding Multimodal Multi-Turn Dialogues in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30130-30140} }
Pointing at Parts: Training-Free Few-Shot Grounding in Multimodal LLMs: Shiang-Feng Tsai,

Yuan-Hong Liao,

Jin-Cheng Jhang,

Nan Qiao,

Min Sun; [pdf] [supp]
[bibtex]
@InProceedings{Tsai_2026_CVPR, author = {Tsai, Shiang-Feng and Liao, Yuan-Hong and Jhang, Jin-Cheng and Qiao, Nan and Sun, Min}, title = {Pointing at Parts: Training-Free Few-Shot Grounding in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33922-33932} }
Evolving Contextual Safety in Multi-Modal Large Language Models via Inference-Time Self-Reflective Memory: Ce Zhang,

Jinxi He,

Junyi He,

Katia Sycara,

Yaqi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ce and He, Jinxi and He, Junyi and Sycara, Katia and Xie, Yaqi}, title = {Evolving Contextual Safety in Multi-Modal Large Language Models via Inference-Time Self-Reflective Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41182-41192} }
PETAR: Localized Findings Generation with Mask-Aware Vision-Language Modeling for PET Automated Reporting: Danyal Maqbool,

Changhee Lee,

Zachary Huemann,

Samuel D. Church,

Matthew E. Larson,

Scott B. Perlman,

Tomas A. Romero,

Joshua D. Warner,

Meghan Lubner,

Xin Tie,

Jameson Merkow,

Junjie Hu,

Steve Y. Cho,

Tyler J. Bradshaw; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maqbool_2026_CVPR, author = {Maqbool, Danyal and Lee, Changhee and Huemann, Zachary and Church, Samuel D. and Larson, Matthew E. and Perlman, Scott B. and Romero, Tomas A. and Warner, Joshua D. and Lubner, Meghan and Tie, Xin and Merkow, Jameson and Hu, Junjie and Cho, Steve Y. and Bradshaw, Tyler J.}, title = {PETAR: Localized Findings Generation with Mask-Aware Vision-Language Modeling for PET Automated Reporting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42637-42648} }
Editprint: General Digital Image Forensics via Editing Fingerprint with Self-Augmentation Training: Haiwei Wu,

Kemou Li,

Yuanman Li,

Jiantao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haiwei and Li, Kemou and Li, Yuanman and Zhou, Jiantao}, title = {Editprint: General Digital Image Forensics via Editing Fingerprint with Self-Augmentation Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35483-35493} }
When Lines Meet Textures: Spatial-Frequency Aligned Diffusion Features for Cross-Sparsity Correspondence: Mingrui Zhu,

Fengzhi Wang,

Xin Wei,

Jun Wang,

Nannan Wang,

Xinbo Gao; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Mingrui and Wang, Fengzhi and Wei, Xin and Wang, Jun and Wang, Nannan and Gao, Xinbo}, title = {When Lines Meet Textures: Spatial-Frequency Aligned Diffusion Features for Cross-Sparsity Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37715-37724} }
Routing on Demand: DSNet for Efficient Progressive Point Cloud Denoising: Xiaoqian Cheng,

Dong Xiao,

Husen Li,

Zheng Liu,

Renjie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Xiaoqian and Xiao, Dong and Li, Husen and Liu, Zheng and Chen, Renjie}, title = {Routing on Demand: DSNet for Efficient Progressive Point Cloud Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39111-39120} }
ElasticFormer: Detecting Objects in HRW Shots via Elastic Computing Vision Transformer: Wenxi Li,

Jingchen Huang,

Chenyang Lyu,

Moran Liu,

Haozhe Lin,

Guiguang Ding,

Yuchen Guo; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenxi and Huang, Jingchen and Lyu, Chenyang and Liu, Moran and Lin, Haozhe and Ding, Guiguang and Guo, Yuchen}, title = {ElasticFormer: Detecting Objects in HRW Shots via Elastic Computing Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32735-32744} }
Pushing the Frontier of Audiovisual Perception with Large-Scale Multimodal Correspondence Learning: Apoorv Vyas,

Heng-Jui Chang,

Cheng-Fu Yang,

Po-Yao Huang,

Luya Gao,

Julius Richter,

Sanyuan Chen,

Matthew Le,

Piotr Dollár,

Christoph Feichtenhofer,

Ann Lee,

Wei-Ning Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vyas_2026_CVPR, author = {Vyas, Apoorv and Chang, Heng-Jui and Yang, Cheng-Fu and Huang, Po-Yao and Gao, Luya and Richter, Julius and Chen, Sanyuan and Le, Matthew and Doll\'ar, Piotr and Feichtenhofer, Christoph and Lee, Ann and Hsu, Wei-Ning}, title = {Pushing the Frontier of Audiovisual Perception with Large-Scale Multimodal Correspondence Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30172-30182} }
NS-Diff: Fluid Navier-Stokes Guided Video Diffusion via Reinforcement Learning: Zijun Deng,

Yuxin Peng; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Zijun and Peng, Yuxin}, title = {NS-Diff: Fluid Navier-Stokes Guided Video Diffusion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43218-43227} }
EpiAgent: An Agent-Centric System for Ancient Inscription Restoration: Shipeng Zhu,

Ang Chen,

Na Nie,

Pengfei Fang,

Min-Ling Zhang,

Hui Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Shipeng and Chen, Ang and Nie, Na and Fang, Pengfei and Zhang, Min-Ling and Xue, Hui}, title = {EpiAgent: An Agent-Centric System for Ancient Inscription Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39304-39313} }
Diffusion-Based sRGB Real Noise Generation via Prompt-Driven Noise Representation Learning: Jaekyun Ko,

Dongjin Kim,

Soomin Lee,

Guanghui Wang,

Tae Hyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Jaekyun and Kim, Dongjin and Lee, Soomin and Wang, Guanghui and Kim, Tae Hyun}, title = {Diffusion-Based sRGB Real Noise Generation via Prompt-Driven Noise Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35956-35966} }
DiGraphHal-Bench: Evaluating Multimodal Large Language Models on Complex Directed Graphs: Yixin Fan,

Zhao He,

Yuxin Hou,

Changhua Zhou,

Zihao Liu,

Peng Wang,

Chenglong Lu,

Xu Zhang,

Wei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yixin and He, Zhao and Hou, Yuxin and Zhou, Changhua and Liu, Zihao and Wang, Peng and Lu, Chenglong and Zhang, Xu and Wang, Wei}, title = {DiGraphHal-Bench: Evaluating Multimodal Large Language Models on Complex Directed Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30885-30894} }
MERG3R: A Divide-and-Conquer Approach to Large-Scale Neural Visual Geometry: Leo Kaixuan Cheng,

Abdus Shaikh,

Ruofan Liang,

Zhijie Wu,

Yushi Guan,

Nandita Vijaykumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Leo Kaixuan and Shaikh, Abdus and Liang, Ruofan and Wu, Zhijie and Guan, Yushi and Vijaykumar, Nandita}, title = {MERG3R: A Divide-and-Conquer Approach to Large-Scale Neural Visual Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28969-28978} }
Alert-CLIP: Abnormality-aware Latent-Enhanced Representation Tuning of CLIP for Video Anomaly Detection: Yiyan Zhu,

Menghao Zhang,

Haifeng Sun,

Pengfei Ren,

Xianao Chu,

Chenye Xu,

Hong Tan,

Jinghan Wang,

Qi Qi,

Jingyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yiyan and Zhang, Menghao and Sun, Haifeng and Ren, Pengfei and Chu, Xianao and Xu, Chenye and Tan, Hong and Wang, Jinghan and Qi, Qi and Wang, Jingyu}, title = {Alert-CLIP: Abnormality-aware Latent-Enhanced Representation Tuning of CLIP for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35545-35554} }
Dropping Anchor and Spherical Harmonics for Sparse-view Gaussian Splatting: Shuangkang Fang,

I-Chao Shen,

Xuanyang Zhang,

Zesheng Wang,

Yufeng Wang,

Wenrui Ding,

Gang YU,

Takeo Igarashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Shuangkang and Shen, I-Chao and Zhang, Xuanyang and Wang, Zesheng and Wang, Yufeng and Ding, Wenrui and YU, Gang and Igarashi, Takeo}, title = {Dropping Anchor and Spherical Harmonics for Sparse-view Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33312-33322} }
MICo-150K: A Comprehensive Dataset Advancing Multi-Image Composition: Xinyu Wei,

Kangrui Cen,

Hongyang Wei,

Zhen Guo,

Bairui Li,

Zeqing Wang,

Jinrui Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Xinyu and Cen, Kangrui and Wei, Hongyang and Guo, Zhen and Li, Bairui and Wang, Zeqing and Zhang, Jinrui and Zhang, Lei}, title = {MICo-150K: A Comprehensive Dataset Advancing Multi-Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29695-29706} }
MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis: Yuting Zhang,

Kaishen Yuan,

Hao Lu,

Yutao Yue,

Jintai Chen,

Kaishun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuting and Yuan, Kaishen and Lu, Hao and Yue, Yutao and Chen, Jintai and Wu, Kaishun}, title = {MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35248-35259} }
Good Can Sometimes be Bad: A Unified Attack against 3D Point Cloud Classifier by a Flexible Isotropic Resampling: Linkun Fan,

Jiahao Zhang,

Juntao Zhang,

Lei Zhang,

Fazhi He,

Daojun Han; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Linkun and Zhang, Jiahao and Zhang, Juntao and Zhang, Lei and He, Fazhi and Han, Daojun}, title = {Good Can Sometimes be Bad: A Unified Attack against 3D Point Cloud Classifier by a Flexible Isotropic Resampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42246-42256} }
CryoHype: Reconstructing a thousand cryo-EM structures with transformer-based hypernetworks: Jeffrey Gu,

Minkyu Jeon,

Ambri Ma,

Serena Yeung-Levy,

Ellen D. Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Jeffrey and Jeon, Minkyu and Ma, Ambri and Yeung-Levy, Serena and Zhong, Ellen D.}, title = {CryoHype: Reconstructing a thousand cryo-EM structures with transformer-based hypernetworks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35280-35290} }
Measuring the (Un)Faithfulness of Concept-Based Explanations: Shubham Kumar,

Narendra Ahuja; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Shubham and Ahuja, Narendra}, title = {Measuring the (Un)Faithfulness of Concept-Based Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38990-39000} }
Preserving Source Video Realism: High-Fidelity Face Swapping for Cinematic Quality: Zekai Luo,

Zongze Du,

Zhouhang Zhu,

Hao Zhong,

Muzhi Zhu,

Wen Wang,

Yuling Xi,

Chenchen Jing,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Zekai and Du, Zongze and Zhu, Zhouhang and Zhong, Hao and Zhu, Muzhi and Wang, Wen and Xi, Yuling and Jing, Chenchen and Chen, Hao and Shen, Chunhua}, title = {Preserving Source Video Realism: High-Fidelity Face Swapping for Cinematic Quality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40653-40663} }
BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates: Phuong-Anh Nguyen,

Tien Anh Pham,

Duc-Trong Le,

Cam-Van Thi Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Phuong-Anh and Pham, Tien Anh and Le, Duc-Trong and Nguyen, Cam-Van Thi}, title = {BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30246-30256} }
PhysGM: Large Physical Gaussian Model for Feed-Forward 4D Synthesis: Chunji Lv,

Zequn Chen,

Donglin Di,

Weinan Zhang,

Hao Li,

Chen Wei,

Yinjie Lei,

Changsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Chunji and Chen, Zequn and Di, Donglin and Zhang, Weinan and Li, Hao and Wei, Chen and Lei, Yinjie and Li, Changsheng}, title = {PhysGM: Large Physical Gaussian Model for Feed-Forward 4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29855-29865} }
High-Fidelity Virtual Try-On beyond Paired Data Scarcity via Diffusion-based Cycle-Consistent Learning: Jia Wu,

Yijing Dai,

Tingfeng Cao,

Meiling Wu,

Tao Luo,

Jian Dong Zhang,

Guangming Lu,

Xiaoyi Zeng; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jia and Dai, Yijing and Cao, Tingfeng and Wu, Meiling and Luo, Tao and Zhang, Jian Dong and Lu, Guangming and Zeng, Xiaoyi}, title = {High-Fidelity Virtual Try-On beyond Paired Data Scarcity via Diffusion-based Cycle-Consistent Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35821-35830} }
Self-guided Semantic Inspection for Zero-Shot Composed Image Retrieval: Jingjing Zhang,

Lei Zhang,

Zheren Fu,

Bo Hu,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingjing and Zhang, Lei and Fu, Zheren and Hu, Bo and Mao, Zhendong}, title = {Self-guided Semantic Inspection for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33881-33890} }
Vision-Language Attribute Disentanglement and Reinforcement for Lifelong Person Re-Identification: Kunlun Xu,

Haotong Cheng,

Jiangmeng Li,

Xu Zou,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Kunlun and Cheng, Haotong and Li, Jiangmeng and Zou, Xu and Zhou, Jiahuan}, title = {Vision-Language Attribute Disentanglement and Reinforcement for Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40397-40406} }
Dance Across Shifts: Forward-Facilitation Continual Test-Time Adaptation through Dynamic Style Bridging: Zhilin Zhu,

Yabin Wang,

Zhiheng Ma,

Yaguang Song,

Yaowei Wang,

Xiaopeng Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhilin and Wang, Yabin and Ma, Zhiheng and Song, Yaguang and Wang, Yaowei and Hong, Xiaopeng}, title = {Dance Across Shifts: Forward-Facilitation Continual Test-Time Adaptation through Dynamic Style Bridging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32322-32333} }
PS-SR: Pseudo-Single-Step Video Super-Resolution via Speculative Diffusion: Aiqiu Wu,

Zhaofan Qiu,

Ting Yao,

Tao Mei; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Aiqiu and Qiu, Zhaofan and Yao, Ting and Mei, Tao}, title = {PS-SR: Pseudo-Single-Step Video Super-Resolution via Speculative Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38218-38227} }
ARES: Unifying Asymmetric RGB-Event Stereo for Probabilistic Scene Flow Estimation: Jie Long Lee,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jie Long and Lee, Gim Hee}, title = {ARES: Unifying Asymmetric RGB-Event Stereo for Probabilistic Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37022-37031} }
SelfHVD: Self-Supervised Handheld Video Deblurring: Honglei Xu,

Zhilu Zhang,

Junjie Fan,

Xiaohe Wu,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Honglei and Zhang, Zhilu and Fan, Junjie and Wu, Xiaohe and Zuo, Wangmeng}, title = {SelfHVD: Self-Supervised Handheld Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37486-37495} }
RFDM: Residual Flow Diffusion Models for Video Editing: Mohammadreza Salehi,

Mehdi Noroozi,

Luca Morreale,

Ruchika Chavhan,

Malcolm Chadwick,

Alberto Gil Couto Pimentel Ramos,

Abhinav Mehrotra; [pdf] [supp]
[bibtex]
@InProceedings{Salehi_2026_CVPR, author = {Salehi, Mohammadreza and Noroozi, Mehdi and Morreale, Luca and Chavhan, Ruchika and Chadwick, Malcolm and Gil Couto Pimentel Ramos, Alberto and Mehrotra, Abhinav}, title = {RFDM: Residual Flow Diffusion Models for Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43514-43524} }
The Power of Decaying Steps: Enhancing Attack Stability and Transferability for Sign-based Optimizers: Wei Tao,

Yang Dai,

Jincai Huang,

Qing Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Wei and Dai, Yang and Huang, Jincai and Tao, Qing}, title = {The Power of Decaying Steps: Enhancing Attack Stability and Transferability for Sign-based Optimizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42300-42309} }
Multi-Scale Gradient-Guided Unrolling Architecture with Adaptive Mamba for Compressive Sensing: Le Yang,

Hongping Gan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Le and Gan, Hongping}, title = {Multi-Scale Gradient-Guided Unrolling Architecture with Adaptive Mamba for Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41794-41803} }
OmniVGGT: Omni-Modality Driven Visual Geometry Grounded Transformer: Haosong Peng,

Hao Li,

Yalun Dai,

Yushi Lan,

Yihang Luo,

Tianyu Qi,

Zhengshen Zhang,

Yufeng Zhan,

Junfei Zhang,

Wenchao Xu,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Haosong and Li, Hao and Dai, Yalun and Lan, Yushi and Luo, Yihang and Qi, Tianyu and Zhang, Zhengshen and Zhan, Yufeng and Zhang, Junfei and Xu, Wenchao and Liu, Ziwei}, title = {OmniVGGT: Omni-Modality Driven Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36485-36497} }
Think, Then Verify: A Hypothesis-Verification Multi-Agent Framework for Long Video Understanding: Zheng Wang,

Haoran Chen,

Haoxuan Qin,

Zhipeng Wei,

Tianwen Qian,

Cong Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zheng and Chen, Haoran and Qin, Haoxuan and Wei, Zhipeng and Qian, Tianwen and Bai, Cong}, title = {Think, Then Verify: A Hypothesis-Verification Multi-Agent Framework for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33784-33793} }
Towards Robust Sequential Decomposition for Complex Image Editing: Zilai Zeng,

Mingdeng Cao,

Zijie Li,

Xiaochen Lian,

Yichun Shi,

Peihao Zhu,

Chen Sun,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Zilai and Cao, Mingdeng and Li, Zijie and Lian, Xiaochen and Shi, Yichun and Zhu, Peihao and Sun, Chen and Wang, Peng}, title = {Towards Robust Sequential Decomposition for Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38101-38110} }
GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation: Jiahao Yang,

Zihan Wang,

Xiangyang Li,

Xing Zhu,

Yujun Shen,

Yinghao Xu,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiahao and Wang, Zihan and Li, Xiangyang and Zhu, Xing and Shen, Yujun and Xu, Yinghao and Jiang, Shuqiang}, title = {GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40053-40063} }
Learning Explicit Continuous Motion Representation for Dynamic Gaussian Splatting from Monocular Videos: Xuankai Zhang,

Junjin Xiao,

Shangwei Huang,

Wei-shi Zheng,

Qing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuankai and Xiao, Junjin and Huang, Shangwei and Zheng, Wei-shi and Zhang, Qing}, title = {Learning Explicit Continuous Motion Representation for Dynamic Gaussian Splatting from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33291-33300} }
Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning: Hao Zhou,

Tiru Wu,

Yan Jiang,

Wanqi Zhou,

Junxing Hu,

Ai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Wu, Tiru and Jiang, Yan and Zhou, Wanqi and Hu, Junxing and Han, Ai}, title = {Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42331-42340} }
MotionV2V: Editing Motion in a Video: Ryan Burgert,

Charles Herrmann,

Forrester Cole,

Michael S Ryoo,

Neal Wadhwa,

Andrey Voynov,

Nataniel Ruiz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Burgert_2026_CVPR, author = {Burgert, Ryan and Herrmann, Charles and Cole, Forrester and Ryoo, Michael S and Wadhwa, Neal and Voynov, Andrey and Ruiz, Nataniel}, title = {MotionV2V: Editing Motion in a Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35988-35997} }
Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling: Long Tang,

Huiyu Duan,

Guoquan Zheng,

Jianbo Zhang,

Jie Hao,

Liang Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Long and Duan, Huiyu and Zheng, Guoquan and Zhang, Jianbo and Hao, Jie and Yuan, Liang}, title = {Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29866-29876} }
CoT-Edit: Let CoT Guide Instruction Video Editing: Sen Liang,

Fengbin Guan,

Youliang Zhang,

Xin Li,

Zhibo Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Sen and Guan, Fengbin and Zhang, Youliang and Li, Xin and Chen, Zhibo}, title = {CoT-Edit: Let CoT Guide Instruction Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37960-37970} }
Sparse Spectral LoRA: Routed Experts for Medical VLMs: Omid Nejatimanzari,

Hojat Asgariandehkordi,

Taha Koleilat,

Yiming Xiao,

Hassan Rivaz; [pdf] [supp]
[bibtex]
@InProceedings{Nejatimanzari_2026_CVPR, author = {Nejatimanzari, Omid and Asgariandehkordi, Hojat and Koleilat, Taha and Xiao, Yiming and Rivaz, Hassan}, title = {Sparse Spectral LoRA: Routed Experts for Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35351-35362} }
PointTPA: Dynamic Network Parameter Adaptation for 3D Scene Understanding: Siyuan Liu,

Chaoqun Zheng,

Xin Zhou,

Tianrui Feng,

Dingkang Liang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Siyuan and Zheng, Chaoqun and Zhou, Xin and Feng, Tianrui and Liang, Dingkang and Bai, Xiang}, title = {PointTPA: Dynamic Network Parameter Adaptation for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36571-36581} }
Can Natural Image Autoencoders Compactly Tokenize fMRI Volumes for Long-Range Dynamics Modeling?: Peter Yongho Kim,

Juhyeon Park,

Jungwoo Park,

Jubin Choi,

Jungwoo Seo,

Jiook Cha,

Taesup Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Peter Yongho and Park, Juhyeon and Park, Jungwoo and Choi, Jubin and Seo, Jungwoo and Cha, Jiook and Moon, Taesup}, title = {Can Natural Image Autoencoders Compactly Tokenize fMRI Volumes for Long-Range Dynamics Modeling?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35321-35330} }
Dexterous World Models: Byungjun Kim,

Taeksoo Kim,

Junyoung Lee,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Byungjun and Kim, Taeksoo and Lee, Junyoung and Joo, Hanbyul}, title = {Dexterous World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29663-29673} }
You Only Erase Once: Erasing Anything without Bringing Unexpected Content: Yixing Zhu,

Qing Zhang,

Wenju Xu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yixing and Zhang, Qing and Xu, Wenju and Zheng, Wei-Shi}, title = {You Only Erase Once: Erasing Anything without Bringing Unexpected Content}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43197-43207} }
EditMGT: Unleashing Potentials of Masked Generative Transformers in Image Editing: Wei Chow,

Linfeng Li,

Lingdong Kong,

Zefeng Li,

Qi Xu,

Hang Song,

Tian Ye,

Xian Wang,

Jinbin Bai,

Shilin Xu,

Xiangtai Li,

Junting Pan,

Shaoteng Liu,

Ran Zhou,

Tianshu Yang,

Songhua Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chow_2026_CVPR, author = {Chow, Wei and Li, Linfeng and Kong, Lingdong and Li, Zefeng and Xu, Qi and Song, Hang and Ye, Tian and Wang, Xian and Bai, Jinbin and Xu, Shilin and Li, Xiangtai and Pan, Junting and Liu, Shaoteng and Zhou, Ran and Yang, Tianshu and Liu, Songhua}, title = {EditMGT: Unleashing Potentials of Masked Generative Transformers in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38038-38048} }; Back