Papers
- Back
Choreographing a World of Dynamic Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Yanzhe and Geng, Chen and Dharmarajan, Karthik and Zhang, Yunzhi and Alzayer, Hadi and Wu, Shangzhe and Wu, Jiajun}, title = {Choreographing a World of Dynamic Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32647-32658} }
HybridDriveVLA: Vision-Language-Action Model with Visual CoT reasoning and ToT Evaluation for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Bassole_2026_CVPR, author = {Bassole, Yipene Cedric Francois and Kim, Sungwoo and Jung, Jiwoo and Sung, Yunsick}, title = {HybridDriveVLA: Vision-Language-Action Model with Visual CoT reasoning and ToT Evaluation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32421-32430} }
Catalyst4D: High-Fidelity 3D-to-4D Scene Editing via Dynamic Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shifeng and Li, Yihui and Liao, Jun and Yang, Hongyu and Huang, Di}, title = {Catalyst4D: High-Fidelity 3D-to-4D Scene Editing via Dynamic Propagation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29792-29802} }
Cloning Deterministic Worlds: The Critical Role of Latent Geometry in Long-Horizon World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Zaishuo and Lu, Yukuan and Li, Xinyi and Xu, Yifan and Chen, Yubei}, title = {Cloning Deterministic Worlds: The Critical Role of Latent Geometry in Long-Horizon World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32602-32612} }
CME-CAD: Heterogeneous Collaborative Multi-Expert Reinforcement Learning for CAD Code Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Ke and Yu, Haiyang and Chen, Zhuofan and Yao, Zhengtao and Jia, Weitao and Ge, Xiaodong and Tang, Jingqun and Cui, Benlei and Li, Bin and Xue, Xiangyang}, title = {CME-CAD: Heterogeneous Collaborative Multi-Expert Reinforcement Learning for CAD Code Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39272-39281} }
SynCLIP: Synonym-Coherent Language-Image Pretraining for Robust Open-Vocabulary Dense Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Mingjie and He, Guangjun and Xu, Dongli and Lin, Youtian and Li, Hongjue and Feng, Pengming and Guan, Jian and Deng, Yue}, title = {SynCLIP: Synonym-Coherent Language-Image Pretraining for Robust Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31524-31533} }
More than the Sum: Panorama-Language Models for Adverse Omni-Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Weijia and Liu, Ruiping and Wei, Jiale and Chen, Yufan and Zheng, Junwei and Zeng, Zichao and Zhang, Jiaming and Li, Qiufu and Shen, Linlin and Stiefelhagen, Rainer}, title = {More than the Sum: Panorama-Language Models for Adverse Omni-Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30874-30884} }
Anomaly-Related Residual Fields for Cross-domain Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Kewei and Xie, Jiayi and Shen, Zhengda and Qin, Weijun and Jia, Lingxiang and Chen, Kejia and Feng, Zunlei and Bei, Yijun}, title = {Anomaly-Related Residual Fields for Cross-domain Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35617-35627} }
DreamSR: Towards Ultra-High-Resolution Image Super-Resolution via a Receptive-Field Enhanced Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Qingji and Dong, Hang and Chen, Mingqin and Zhang, Rui and Wang, Yitong}, title = {DreamSR: Towards Ultra-High-Resolution Image Super-Resolution via a Receptive-Field Enhanced Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38258-38269} }
VisRef: Visual Refocusing while Thinking Improves Test-Time Scaling in Multi-Modal Large Reasoning Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosal_2026_CVPR, author = {Ghosal, Soumya Suvra and Kim, Youngeun and Li, Zhuowei and Chaudhry, Ritwick and Xu, Linghan and Zhang, Hongjing and Zablocki, Jakub and Xing, Yifan and Zhang, Qin}, title = {VisRef: Visual Refocusing while Thinking Improves Test-Time Scaling in Multi-Modal Large Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33404-33414} }
Enhancing the Security of Visual Speaker Authentication Based on Dynamic Lip-Print Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yi and Yang, Lei and Chen, Bofan and Wang, Shilin}, title = {Enhancing the Security of Visual Speaker Authentication Based on Dynamic Lip-Print Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35462-35471} }
Beyond Mimicry: Learning Whole-Body Human-Humanoid Interaction from Human-Human Demonstrations-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Wei-Jin and Zhang, Yue-Yi and Wei, Yi-Lin and Xia, Zhi-Wei and Tan, Juantao and Li, Yuan-Ming and Zhao, Zhilin and Zheng, Wei-Shi}, title = {Beyond Mimicry: Learning Whole-Body Human-Humanoid Interaction from Human-Human Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30740-30749} }
Boundary-Responsive Differentiable Gating for Superpixel-Based Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ahmed_2026_CVPR, author = {Ahmed, Fatmaelzahraa and Lu, Zhihe and Caro, Gianni and Tabaa, Diram and Hamdy, Mohamed and Abdel-Ghani, Muraam and Al-Ali, Abdulaziz and Arsalan, Muhammad and Balakrishnan, Shidin}, title = {Boundary-Responsive Differentiable Gating for Superpixel-Based Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42137-42146} }
Rethinking Asymmetric Quantization: Hidden Symmetry in Vision Model Weights-
[pdf]
[supp]
[bibtex]@InProceedings{Mori_2026_CVPR, author = {Mori, Masafumi and Gongyo, Shinya and Ambai, Mitsuru}, title = {Rethinking Asymmetric Quantization: Hidden Symmetry in Vision Model Weights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33609-33620} }
PR-IQA: Partial-Reference Image Quality Assessment for Diffusion-Based Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Inseong and Lee, Siwoo and Nam, Seung-Hun and Song, Soohwan}, title = {PR-IQA: Partial-Reference Image Quality Assessment for Diffusion-Based Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37301-37310} }
Fine-VAD: Towards Fine-Grained Video Anomaly Detection via Progressive Cross-Granularity Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Menghao and Zhu, Yiyan and Ren, Pengfei and Sun, Haifeng and Qi, Qi and Zhuang, Zirui and Wang, Huazheng and Zhang, Lei and Liao, Jianxin and Wang, Jingyu}, title = {Fine-VAD: Towards Fine-Grained Video Anomaly Detection via Progressive Cross-Granularity Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35514-35523} }
CausalVAD: De-confounding End-to-End Autonomous Driving via Causal Intervention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Jiacheng and Zhou, Zhiyuan and He, Zhuolin and Zhang, Jia and Zhang, Kai and Pu, Jian}, title = {CausalVAD: De-confounding End-to-End Autonomous Driving via Causal Intervention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32124-32133} }
ReGenHOI: Unifying Reconstruction and Generation for 3D Human-Object Interaction Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Miao and Zhu, Xiangyu and Wang, Zidu and Liang, Xusheng and Li, Bao and Wu, Jinlin and Zang, Zelin and Lei, Zhen}, title = {ReGenHOI: Unifying Reconstruction and Generation for 3D Human-Object Interaction Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42847-42857} }
Masked Region Transformer for Layered Image Generation and Editing at Scale-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Zhicong and Chen, Jingye and Zhang, Zhao and Zhou, Mohan and Liu, Yuchi and Pu, Yifan and Bai, Yalong and Smith, Ethan and Yuan, Yuhui}, title = {Masked Region Transformer for Layered Image Generation and Editing at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40622-40632} }
DROID-SLAM in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Moyang and Zhu, Zihan and Pollefeys, Marc and Barath, Daniel}, title = {DROID-SLAM in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36498-36508} }
Intrinsic Concept Extraction Based on Compositional Interpretability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Hanyu and Tao, Hong and Huang, Guoheng and Jiang, Jianbin and Chen, Xuhang and Pun, Chi-Man and Wang, Shanhu and Pan, Pan}, title = {Intrinsic Concept Extraction Based on Compositional Interpretability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38969-38978} }
Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parihar_2026_CVPR, author = {Parihar, Rishubh and Patashnik, Or and Ostashev, Daniil and Radhakrishnan, Venkatesh Babu and Cohen-Or, Daniel and Wang, Kuan-Chieh Jackson}, title = {Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37929-37939} }
ExtrinSplat: Decoupling Geometry and Semantics for Open-Vocabulary Understanding in 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Jiayu and Liu, Xinpeng and Pan, Zhiyi and Long, Shiqiang and Li, Ge}, title = {ExtrinSplat: Decoupling Geometry and Semantics for Open-Vocabulary Understanding in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31019-31028} }
StyleDoctor: Towards Specialist Reward Model for Style-centric Generation Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xilin and Xian, Xiaole and Yue, Xiangyu and Khan, Muhammad Haris}, title = {StyleDoctor: Towards Specialist Reward Model for Style-centric Generation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29126-29135} }
From Events to Clarity: The Event-Guided Diffusion Framework for Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ling and Lu, Yunfan and Ma, Wenzong and Yao, Huizai and Li, Pengteng and Xiong, Hui}, title = {From Events to Clarity: The Event-Guided Diffusion Framework for Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34028-34039} }
Beyond Text Prompts: Precise Concept Erasure through Text-Image Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jun and Xiong, Lizhi and Li, Ziqiang and Jiang, Weiwei and Fu, Zhangjie and Li, Yong and Xie, Guo-Sen}, title = {Beyond Text Prompts: Precise Concept Erasure through Text-Image Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37653-37663} }
FlowMotion: Training-Free Flow Guidance for Video Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhen and Xu, Youcan and Xiao, Jun and Chen, Long}, title = {FlowMotion: Training-Free Flow Guidance for Video Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38143-38153} }
Visual Diffusion Models are Geometric Solvers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goren_2026_CVPR, author = {Goren, Nir and Yehezkel, Shai and Dahary, Omer and Voynov, Andrey and Patashnik, Or and Cohen-Or, Daniel}, title = {Visual Diffusion Models are Geometric Solvers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43187-43196} }
Image Diffusion Preview with Consistency Solver-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Fu-Yun and Zhou, Hao and Yuan, Liangzhe and Woo, Sanghyun and Gong, Boqing and Han, Bohyung and Yang, Ming-Hsuan and Zhang, Han and Zhu, Yukun and Liu, Ting and Zhao, Long}, title = {Image Diffusion Preview with Consistency Solver}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43271-43280} }
Resolving the Identity Crisis in Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Borse_2026_CVPR, author = {Borse, Shubhankar and Farhadzadeh, Farzad and Hayat, Munawar and Porikli, Fatih}, title = {Resolving the Identity Crisis in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36703-36712} }
High-Quality and Efficient Turbulence Mitigation with Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoran and Ding, Jian and Duan, Yuxing and Liu, Haoyue and Chen, Gang and Chang, Yi and Yan, Luxin}, title = {High-Quality and Efficient Turbulence Mitigation with Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29514-29525} }
SEA-Vision: A Multilingual Benchmark for Comprehensive Document and Scene Text Understanding in Southeast Asia-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Pengfei and Zhao, Xingran and Chen, Juntao and Hou, Peng and Longchao, Wang and Lin, Jianghang and Zhang, Shengchuan and Zeng, Anxiang and Cao, Liujuan}, title = {SEA-Vision: A Multilingual Benchmark for Comprehensive Document and Scene Text Understanding in Southeast Asia}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30895-30905} }
VITAL: Vision-Encoder-centered Pre-training for LMMs in Visual Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Ziheng and Cao, Linhan and Han, Jinliang and Zhang, Zicheng and Qian, Jiaying and Wang, Jiarui and Chen, Zijian and Zhai, Guangtao and Min, Xiongkuo}, title = {VITAL: Vision-Encoder-centered Pre-training for LMMs in Visual Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41245-41255} }
Jailbreaking Vision-Language Models via Dissonance-Guided Suffix Optimization and Image-Phrase Injection-
[pdf]
[supp]
[bibtex]@InProceedings{Pi_2026_CVPR, author = {Pi, Jiacheng and Yang, Zhiguo and Huang, Xingxing and Xu, Dongsheng and Zhong, Ruizhi and Ruan, Wenjie}, title = {Jailbreaking Vision-Language Models via Dissonance-Guided Suffix Optimization and Image-Phrase Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30087-30097} }
SHARP: Short-Window Streaming for Accurate and Robust Prediction in Motion Forecasting-
[pdf]
[supp]
[bibtex]@InProceedings{Prutsch_2026_CVPR, author = {Prutsch, Alexander and Fruhwirth-Reisinger, Christian and Schinagl, David and Possegger, Horst}, title = {SHARP: Short-Window Streaming for Accurate and Robust Prediction in Motion Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32103-32112} }
SPAN: Spatial-Projection Alignment for Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yifan and Zhao, Yian and Pu, Fanqi and Yang, Xiaochen and Tang, Yang and Chen, Xi and Yang, Wenming}, title = {SPAN: Spatial-Projection Alignment for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40762-40771} }
Is the Modality Gap a Bug or a Feature? A Robustness Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowers_2026_CVPR, author = {Chowers, Rhea and Naparstek, Oshri and Barzelay, Udi and Weiss, Yair}, title = {Is the Modality Gap a Bug or a Feature? A Robustness Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30288-30298} }
Transform to Transfer: Boosting Adversarial Attack Transferability on Vision-Language Pre-training Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yang and Yin, Jia-Li and Lin, Luojun and Lin, Wei}, title = {Transform to Transfer: Boosting Adversarial Attack Transferability on Vision-Language Pre-training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30141-30150} }
Explore with Long-term Memory: A Benchmark and Multimodal LLM-based Reinforcement Learning Framework for Embodied Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Sen and Liu, Bangwei and Gao, Zhenkun and Ma, Lizhuang and Wang, Xuhong and Xie, Yuan and Tan, Xin}, title = {Explore with Long-term Memory: A Benchmark and Multimodal LLM-based Reinforcement Learning Framework for Embodied Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37098-37108} }
Multi-Modal Image Fusion via Intervention-Stable Feature Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xue and Guan, Zheng and Qian, Wenhua and Wang, Chengchao and Ma, Runzhuo}, title = {Multi-Modal Image Fusion via Intervention-Stable Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33827-33837} }
Dark3R: Learning Structure from Motion in the Dark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Andrew Y. and Malik, Anagh and Tedla, SaiKiran and Dai, Yutong and Qin, Yiqian and Salehe, Zach and Attal, Benjamin and Nousias, Sotiris and Kutulakos, Kiriakos N. and Lindell, David B.}, title = {Dark3R: Learning Structure from Motion in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34160-34170} }
Efficient and Training-Free Single-Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Haojun and Kutulakos, Kiriakos N. and Lindell, David B.}, title = {Efficient and Training-Free Single-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36157-36167} }
Detecting Compressed AI-Generated Images via Phase Spectrum Robustness-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kai and Ren, Wenqi and Wang, Wei and Cao, Xiaochun}, title = {Detecting Compressed AI-Generated Images via Phase Spectrum Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35427-35436} }
Exposing and Evaluating Hallucinations for GUI Grounding-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zicheng and Jing, Hongyi and Lv, Rui and Fang, Shuo and Zhu, Shiai and Wang, Junying and Li, Chunyi and Liu, Xiaohong and Ma, Chenguang and Zhai, Guangtao}, title = {Exposing and Evaluating Hallucinations for GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40208-40223} }
Beyond Sequential Tools: A Unified VLM Agent System for Photographic Post-Processing via Dynamic Multi-Expert Fusion-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Honglin and Zhu, Chenjie and Ding, Jianbiao and Ni, Zixuan and Li, Wei and Mi, Zhenpeng and Wang, Qian}, title = {Beyond Sequential Tools: A Unified VLM Agent System for Photographic Post-Processing via Dynamic Multi-Expert Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41521-41530} }
LIBERO-Plus: A Progressive Robustness Benchmark for Visual-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fei_2026_CVPR, author = {Fei, Senyu and Wang, Siyin and Shi, Junhao and Dai, Zihao and Cai, Jikun and Qian, Pengfang and Ji, Li and He, Xinzhe and Zhang, Shiduo and Fei, Zhaoye and Fu, Jinlan and Gong, Jingjing and Qiu, Xipeng}, title = {LIBERO-Plus: A Progressive Robustness Benchmark for Visual-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38574-38583} }
MajutsuCity: Language-driven Aesthetic-adaptive City Generation with Controllable 3D Assets and Layouts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zilong and He, Jun and Huang, Xiaobin and Xiong, Ziyi and Luo, Yang and Ye, Junyan and Li, Weijia and Chen, Yiping and Han, Ting}, title = {MajutsuCity: Language-driven Aesthetic-adaptive City Generation with Controllable 3D Assets and Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31608-31618} }
OpenDance: Multimodal Controllable 3D Dance Generation with Large-scale Internet Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinlu and Kang, Zixi and Liu, Libin and Chang, Jianlong and Tian, Qi and Gao, Feng and Wang, Yizhou}, title = {OpenDance: Multimodal Controllable 3D Dance Generation with Large-scale Internet Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28860-28870} }
Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Yunxiang and Ma, Mengmeng and Yao, Ziyu and Peng, Xi}, title = {Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38936-38946} }
AniMimic: Imitating 3D Animation from Video Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Tianyi and Chen, Yunuo and Guo, Yaowei and Yang, Yin and Zhou, Bolei and Terzopoulos, Demetri and Jiang, Ying and Jiang, Chenfanfu}, title = {AniMimic: Imitating 3D Animation from Video Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40266-40276} }
Reviving ConvNeXt for Efficient Convolutional Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Taesung and Bianchi, Lorenzo and Wittke, Lennart and Watine, Felix and Carrara, Fabio and Ye, Jong Chul and Weber, Romann and Azevedo, Vinicius}, title = {Reviving ConvNeXt for Efficient Convolutional Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43675-43685} }
DetAny4D: Detect Anything 4D Temporally in a Streaming RGB Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Jiawei and Zhang, Shenghao and Wang, Can and Gu, Zheng and Ling, Yonggen and Zeng, Taiping and Xue, Xiangyang and Zhang, Jingbo}, title = {DetAny4D: Detect Anything 4D Temporally in a Streaming RGB Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32798-32807} }
Hierarchical Long Video Understanding with Audiovisual Entity Cohesion and Agentic Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Xinlei and Peng, Xiulian and Li, Xiao and Xiong, Zhiwei and Lu, Yan}, title = {Hierarchical Long Video Understanding with Audiovisual Entity Cohesion and Agentic Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32882-32891} }
CaptionFormer: Unified Segmentation, Tracking, and Captioning for Spatio-Temporal Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Fiastre_2026_CVPR, author = {Fiastre, Gabriel and Yang, Antoine and Schmid, Cordelia}, title = {CaptionFormer: Unified Segmentation, Tracking, and Captioning for Spatio-Temporal Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39518-39528} }
OnlinePG: Online Open-Vocabulary Panoptic Mapping with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhai_2026_CVPR, author = {Zhai, Hongjia and Zhang, Qi and Pan, Xiaokun and Zhang, Xiyu and Dong, Yitong and Zhang, Huaqi and Xu, Dan and Zhang, Guofeng}, title = {OnlinePG: Online Open-Vocabulary Panoptic Mapping with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33269-33279} }
PyraTok: Language-Aligned Pyramidal Tokenizer for Video Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Susladkar_2026_CVPR, author = {Susladkar, Onkar and Prakash, Tushar and Juvekar, Adheesh and Nguyen, Kiet A. and Jang, Dong-Hwan and Dhillon, Inderjit S and Lourentzou, Ismini}, title = {PyraTok: Language-Aligned Pyramidal Tokenizer for Video Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37906-37917} }
UniGeoSeg: Towards Unified Open-World Segmentation for Geospatial Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Shuo and Wang, Di and Chen, He and Guo, Haonan and Zhang, Ning and Zhang, Jing}, title = {UniGeoSeg: Towards Unified Open-World Segmentation for Geospatial Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34865-34876} }
ShiftLUT: Spatial Shift Enhanced Look-Up Tables for Efficient Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xiaolong and Yu, Yitong and Xiong, Shiyao and Hao, Jinhua and Sun, Ming and Zhou, Chao and Wang, Bin}, title = {ShiftLUT: Spatial Shift Enhanced Look-Up Tables for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29959-29968} }
MMSD3.0: A Multi-Image Benchmark for Real-World Multimodal Sarcasm Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haochen and Kong, Yuyao and Xu, Yongxiu and Gou, Gaopeng and Xu, Hongbo and Wang, Yubin and Zhang, Haoliang}, title = {MMSD3.0: A Multi-Image Benchmark for Real-World Multimodal Sarcasm Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37885-37895} }
Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Weijian and Zhang, Songqian and Han, Yuqi and Zhuang, Jian and Huang, Yongdong and Zhang, Qiang}, title = {Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41499-41509} }
Beyond the Ground Truth: Enhanced Supervision for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ryou_2026_CVPR, author = {Ryou, Donghun and Ha, Inju and Chu, Sanghyeok and Han, Bohyung}, title = {Beyond the Ground Truth: Enhanced Supervision for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29949-29958} }
SineProject: Machine Unlearning for Stable Vision-Language Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garg_2026_CVPR, author = {Garg, Arpit and Saratchandran, Hemanth and Lucey, Simon}, title = {SineProject: Machine Unlearning for Stable Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31735-31745} }
Delving Aleatoric Uncertainty in Medical Image Segmentation via Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ruiyang and Liu, Fang and Jiao, Licheng and Xie, Xinglin and Hao, Jiayao and Li, Shuo and Liu, Xu and Yang, Jingyi and Li, Lingling and Chen, Puhua and Ma, Wenping}, title = {Delving Aleatoric Uncertainty in Medical Image Segmentation via Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30011-30020} }
Language Models Can Explain Visual Features via Steering-
[pdf]
[supp]
[bibtex]@InProceedings{Ferrando_2026_CVPR, author = {Ferrando, Javier and Lopez-Cuena, Enrique and Martin-Torres, Pablo Agustin and Hinjos, Daniel and Arias-Duart, Anna and Garcia-Gasulla, Dario}, title = {Language Models Can Explain Visual Features via Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38947-38958} }
PanDA: Unsupervised Domain Adaptation for Multimodal 3D Panoptic Segmentation in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Yining and Li, Shijie and Wu, Yuchen and Yang, Xulei and Zhao, Na}, title = {PanDA: Unsupervised Domain Adaptation for Multimodal 3D Panoptic Segmentation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33057-33067} }
ReCoFuse: Ultra-Robust Image Fusion via Restorative Multi-Modal Diffusion Reciprocal Coupling-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Yang, Shuhan and Tang, Linfeng and Yi, Xunpeng and Ma, Jiayi}, title = {ReCoFuse: Ultra-Robust Image Fusion via Restorative Multi-Modal Diffusion Reciprocal Coupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33838-33847} }
The Drift Kernel: Why Diffusion Models Change Even When Told Not To-
[pdf]
[supp]
[bibtex]@InProceedings{Ram_2026_CVPR, author = {Ram, Gokul Srinath Seetha and Elavazhagan, Rashmi}, title = {The Drift Kernel: Why Diffusion Models Change Even When Told Not To}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43281-43289} }
Accelerating Autoregressive Video Diffusion via History-Guided Cache and Residual Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Nan_2026_CVPR, author = {Nan, Kepan and Zhao, Wangbo and Zhou, Penghao and Li, Jun and Yang, Zhenheng and Yang, Jian and Tai, Ying}, title = {Accelerating Autoregressive Video Diffusion via History-Guided Cache and Residual Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43740-43750} }
Personalized Federated Training of Diffusion Models with Privacy Guarantees-
[pdf]
[supp]
[bibtex]@InProceedings{Patel_2026_CVPR, author = {Patel, Kumar Kshitij and Jiang, Bingqing and Kabir, A F M Mahfuzul and Zhang, Weitong and Zou, Difan and Wang, Lingxiao}, title = {Personalized Federated Training of Diffusion Models with Privacy Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31790-31801} }
KaLOS finds Consensus: A Meta-Algorithm for Evaluating Inter-Annotator Agreement in Complex Vision Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Tschirschwitz_2026_CVPR, author = {Tschirschwitz, David and Rodehorst, Volker}, title = {KaLOS finds Consensus: A Meta-Algorithm for Evaluating Inter-Annotator Agreement in Complex Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38554-38563} }
Rank-Guided Pseudo-Bias Learning for Robust Black-Box Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Dwivedi_2026_CVPR, author = {Dwivedi, Rajeev Ranjan and Dangwal, Anshuman and Kurmi, Vinod K}, title = {Rank-Guided Pseudo-Bias Learning for Robust Black-Box Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31683-31692} }
Breaking the Regional Perception Bottleneck of Multimodal Large Language Models via External Reasoning Framework-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinrong and Xu, Zhaoyang and He, Xusheng and Li, Xinrui and Zheng, Na and Wu, Jianlong}, title = {Breaking the Regional Perception Bottleneck of Multimodal Large Language Models via External Reasoning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33531-33541} }
Cross-Modal Attention Calibration for LVLM Hallucination Mitigation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiaming and Zhang, Jiacheng and Jie, Zequn and Ma, Lin and Li, Ming and Luo, Xiaonan and Li, Guanbin}, title = {Cross-Modal Attention Calibration for LVLM Hallucination Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40186-40196} }
Human Geometry Distribution for 3D Animation Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Xiangjun and Zhang, Biao and Wonka, Peter}, title = {Human Geometry Distribution for 3D Animation Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38313-38323} }
Moving Border Ownership for Event-based Motion Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hua_2026_CVPR, author = {Hua, Zhiyuan and Ferm\"uller, Cornelia and Aloimonos, Yiannis}, title = {Moving Border Ownership for Event-based Motion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37043-37052} }
Towards Visual Query Localization in the 3D World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Liang and Tan, Bohan and Zhang, Zhipeng and Li, Haobo and Jiao, Yifan and Dong, Xingping and Zhang, Libo}, title = {Towards Visual Query Localization in the 3D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41406-41415} }
What Is It Like to Be a Noise? An Entropy-based Gaussian Noise Regularization for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Pascal and 0000-0002-8590-8039, Studios\_\_\_ Switzerland and Lascheit, Kai and blank, Studios\_\_\_ Switzerland and Tang, Jingwei and 0009-0000-6005-7808, Studios\_\_\_ Switzerland and Gross, Markus and 0009-0003-9324-779X, Studios\_\_\_ Switzerland and Azevedo, Vinicius C. and 0009-0002-4133-4309, Studios\_\_\_ Switzerland}, title = {What Is It Like to Be a Noise? An Entropy-based Gaussian Noise Regularization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43471-43481} }
PhysVid: Physics Aware Local Conditioning for Generative Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pathak_2026_CVPR, author = {Pathak, Saurabh and Arani, Elahe and Pechenizkiy, Mykola and Zonooz, Bahram}, title = {PhysVid: Physics Aware Local Conditioning for Generative Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41847-41858} }
Harnessing Chain-of-Thought Reasoning in Multimodal Large Language Models for Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Honglu and Fang, Zhiqin and Zhao, Ningning and Hou, Saihui and Ma, Long and Pei, Renwang and He, Zhaofeng}, title = {Harnessing Chain-of-Thought Reasoning in Multimodal Large Language Models for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33566-33576} }
ArtLLM: Generating Articulated Assets via 3D LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Penghao and Xie, Siyuan and Yan, Hongyu and Yang, Xianghui and Huang, Jingwei and Guo, Chunchao and Gu, Jiayuan}, title = {ArtLLM: Generating Articulated Assets via 3D LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34281-34291} }
DDT: Decoupled Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shuai and Tian, Zhi and Huang, Weilin and Wang, Limin}, title = {DDT: Decoupled Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40633-40642} }
Pose-guided Enriched Feature Learning for Federated-by-camera Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, JooHyung and Oh, Minyoung and Yoon, Sung Whan and Sim, Jae-Young}, title = {Pose-guided Enriched Feature Learning for Federated-by-camera Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40458-40467} }
Intrinsic Geometry-Appearance Consistency Optimization for Sparse-View Gaussian Splatting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Kaiqiang and Peng, Rui and Wu, Jiahao and Wang, Zhanke and Liang, Jie and Zheng, Xiaoyun and Gao, Feng and Wang, Ronggang}, title = {Intrinsic Geometry-Appearance Consistency Optimization for Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40918-40928} }
SpatialStack: Layered Geometry-Language Fusion for 3D VLM Spatial Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jian and Zhou, Shijie and Liu, Bangya and Kadambi, Achuta and Fan, Zhiwen}, title = {SpatialStack: Layered Geometry-Language Fusion for 3D VLM Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38678-38688} }
Hint2Gen: Bridging Understanding and Generation via Code-structured Hints-
[pdf]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Yuanpeng and Chen, Yunpeng and Chen, Xi and Li, Liang and Zhao, Hengshuang}, title = {Hint2Gen: Bridging Understanding and Generation via Code-structured Hints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36593-36603} }
Goldilocks Test Sets for Face Verification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haiyu and Tian, Sicong and Bhatta, Aman and Gutierrez, Jacob and Bezold, Grace and Argueta, Genesis and Ricanek, Karl and King, Michael C. and Bowyer, Kevin}, title = {Goldilocks Test Sets for Face Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35504-35513} }
Abstract 3D Perception for Spatial Intelligence in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yifan and Zhan, Fangneng and Zhou, Kaichen and Du, Yilun and Liang, Paul Pu and Pfister, Hanspeter}, title = {Abstract 3D Perception for Spatial Intelligence in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38647-38656} }
Learning to Infer Parameterized Representations of Plants from 3D Scans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghrer_2026_CVPR, author = {Ghrer, Samara and Godin, Christophe and Wuhrer, Stefanie}, title = {Learning to Infer Parameterized Representations of Plants from 3D Scans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42236-42245} }
Investigating Self-Supervised Representations for Audio-Visual Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boldisor_2026_CVPR, author = {Boldisor, Dragos-Alexandru and Smeu, Stefan and Oneata, Dan and Oneata, Elisabeta}, title = {Investigating Self-Supervised Representations for Audio-Visual Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43018-43029} }
TF-SSD: A Strong Pipeline via Synergic Mask Filter for Training-free Co-salient Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zhijin and Jin, Shuo and Yu, Siyue and Wu, Shuwei and Zhang, Bingfeng and Yu, Li and Xiao, Jimin}, title = {TF-SSD: A Strong Pipeline via Synergic Mask Filter for Training-free Co-salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32216-32225} }
Structure-to-Intensity Diffusion for Adverse-Weather LiDAR Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Peiyang and Yang, Longyu and Zhang, Lu and Saito, Kuniaki and Tan, Yap-Peng and Shen, Fumin and Shen, Heng Tao and Zhu, Xiaofeng and Hu, Ping}, title = {Structure-to-Intensity Diffusion for Adverse-Weather LiDAR Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35904-35914} }
AV-Reasoner: Improving and Benchmarking Clue-Grounded Audio-Visual Counting for MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Lidong and Chen, Guo and Wei, Zhu and Li, Zhiqi and Liu, Yicheng and Lu, Tong}, title = {AV-Reasoner: Improving and Benchmarking Clue-Grounded Audio-Visual Counting for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33477-33487} }
WRIVINDER: Towards Spatial Intelligence for Geo-locating Ground Images onto Satellite Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gudavalli_2026_CVPR, author = {Gudavalli, Chandrakanth and Mohammed, Tajuddin Manhar and Yadav, Abhay and Bhaskar, Ananth Vishnu and Prajapati, Hardik and Peng, Cheng and Chellappa, Rama and Chandrasekaran, Shivkumar and Manjunath, B.S.}, title = {WRIVINDER: Towards Spatial Intelligence for Geo-locating Ground Images onto Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33703-33713} }
VIAFormer: Voxel-Image Alignment Transformer for High-Fidelity Voxel Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Tiancheng and Pan, Bowen and Chen, Lingxi and Lyu, Jiangjing and Lv, Chengfei and Niu, Chaoyue and Wu, Fan}, title = {VIAFormer: Voxel-Image Alignment Transformer for High-Fidelity Voxel Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29060-29070} }
End-to-End Language-Action Model for Humanoid Whole Body Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuxuan and Jiang, Haobin and Yao, Shiqing and Ding, Ziluo and Lu, Zongqing}, title = {End-to-End Language-Action Model for Humanoid Whole Body Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38399-38409} }
IDESplat: Iterative Depth Probability Estimation for Generalizable 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Wei and Wu, Haifeng and Jiang, Shiyin and Zhang, Jinhua and Ji, Xinchun and Gu, Shuhang}, title = {IDESplat: Iterative Depth Probability Estimation for Generalizable 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33248-33258} }
Reliable Policy Transfer for Safety-Aware End-to-End Driving with Deep Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Borhan_2026_CVPR, author = {Borhan, Uddin Md. and Raza, Arif and Lin, Zhiliang and Wang, Lu and Li, Jianqiang and Chen, Jie}, title = {Reliable Policy Transfer for Safety-Aware End-to-End Driving with Deep Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32134-32143} }
LacTokGen: Latent Consistency Tokenizer for 1024-pixel Image Generation by 256 Tokens-
[pdf]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Qingsong and Zhang, Luyuan and Zhang, Zhao and Li, Siyuan and Huang, Zhe and Yang, Zhenyu and Lu, Haonan}, title = {LacTokGen: Latent Consistency Tokenizer for 1024-pixel Image Generation by 256 Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30368-30380} }
BEV-SLD: Self-Supervised Scene Landmark Detection for Global Localization with LiDAR Bird's-Eye View Images-
[pdf]
[supp]
[bibtex]@InProceedings{Skuddis_2026_CVPR, author = {Skuddis, David and Ress, Vincent and Zhang, Wei and Nyako, Vincent Ofosu and Haala, Norbert}, title = {BEV-SLD: Self-Supervised Scene Landmark Detection for Global Localization with LiDAR Bird's-Eye View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31400-31409} }
Dejavu: Towards Experience Feedback Learning for Embodied Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Shaokai and Ji, Yanbiao and Li, Qiuchang and Zhang, Zhiyi and He, Qichen and Xie, Wenyuan and Zhang, Guodong and Bayramli, Bayram and Ding, Yue and Lu, Hongtao}, title = {Dejavu: Towards Experience Feedback Learning for Embodied Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29578-29587} }
Toward Diffusible High-Dimensional Latent Spaces: A Frequency Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Bolin and Wang, XuDong and Rambhatla, Saketh and Rehg, James M. and Kira, Zsolt and Girdhar, Rohit and Misra, Ishan}, title = {Toward Diffusible High-Dimensional Latent Spaces: A Frequency Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43450-43460} }
CORE: Compact Object-centric REpresentations as a New Paradigm for Token Merging in LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Jingyu and Wang, Gaoang and Lee, Der-Horng}, title = {CORE: Compact Object-centric REpresentations as a New Paradigm for Token Merging in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39593-39605} }
MacTok: Robust Continuous Tokenization for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Hengyu and Gao, Xin and Li, Guanghao and Yan, Yuxiang and Ruan, Jiaoyang and Ma, Junpeng and Wang, Haoyu Albert and Pu, Jian}, title = {MacTok: Robust Continuous Tokenization for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43407-43417} }
MarkushGrapher-2: End-to-end Multimodal Recognition of Chemical Structures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Strohmeyer_2026_CVPR, author = {Strohmeyer, Tim and Morin, Lucas and Meijer, Gerhard Ingmar and Weber, Valery and Nassar, Ahmed and Staar, Peter}, title = {MarkushGrapher-2: End-to-end Multimodal Recognition of Chemical Structures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31176-31185} }
Scene-VLM: Multimodal Video Scene Segmentation via Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Berman_2026_CVPR, author = {Berman, Nimrod and Botach, Adam and Ben-Baruch, Emanuel and Hakimi, Shunit Haviv and Gendler, Asaf and Naiman, Ilan and Yosef, Erez and Kviatkovsky, Igor}, title = {Scene-VLM: Multimodal Video Scene Segmentation via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39848-39857} }
CICA: Coupling Confidence-Aware Pretraining with Confidence-Informed Attention for Robust Multimodal Sentiment Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haoyu and Chen, Xiaoliang and Miao, Duoqian and Qin, Xiaolin and Li, Xianyong and Du, Yajun}, title = {CICA: Coupling Confidence-Aware Pretraining with Confidence-Informed Attention for Robust Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37842-37851} }
LongVideo-R1: Smart Navigation for Low-cost Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Jihao and Xie, Lingxi and Huo, Xinyue and Tian, Qi and Ye, Qixiang}, title = {LongVideo-R1: Smart Navigation for Low-cost Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40505-40515} }
HyCal: A Training-Free Prototype Calibration Method for Cross-Discipline Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Eunju and Kim, MiHyeon and Kwon, JuneHyoung and Lee, Yoonji and Kim, JiHyun and Jang, Soojin and Kim, YoungBin}, title = {HyCal: A Training-Free Prototype Calibration Method for Cross-Discipline Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29462-29471} }
HDR-VLM: HDR-Domain Adaptation of VLMs and Preference-Aligned Quality Assessment for HDR Video Color Grading-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Hao and Zhang, Jiabin and Wu, Yajing and Pang, Ruixuan and Li, Jing}, title = {HDR-VLM: HDR-Domain Adaptation of VLMs and Preference-Aligned Quality Assessment for HDR Video Color Grading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40792-40801} }
ReMoRa: Multimodal Large Language Model based on Refined Motion Representation for Long-Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yashima_2026_CVPR, author = {Yashima, Daichi and Kurita, Shuhei and Oda, Yusuke and Sugiura, Komei}, title = {ReMoRa: Multimodal Large Language Model based on Refined Motion Representation for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31845-31855} }
Seeing without Pixels: Perception from Camera Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Zihui and Grauman, Kristen and Damen, Dima and Zisserman, Andrew and Han, Tengda}, title = {Seeing without Pixels: Perception from Camera Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38836-38847} }
A Unified Framework for Knowledge Transfer in Bidirectional Model Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Jianlu and Feng, Fu and Xu, Jiaze and Xie, Yucheng and Lv, Jiaqi and Geng, Xin}, title = {A Unified Framework for Knowledge Transfer in Bidirectional Model Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34535-34545} }
MoECLIP: Patch-Specialized Experts for Zero-shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jun Yeong and Seo, JunYoung and Kang, Minji and Park, Yu Rang}, title = {MoECLIP: Patch-Specialized Experts for Zero-shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35534-35544} }
SLARM: Streaming and Language-Aligned Reconstruction Model for Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Zhicheng and Meng, Jiarui and Luo, Tong-an and Huang, Yican and Feng, Xuan and Li, Xuanfu and Xu, Zhan}, title = {SLARM: Streaming and Language-Aligned Reconstruction Model for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29023-29034} }
Unified Camera Positional Encoding for Controlled Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Cheng and Li, Boying and Wei, Meng and Cao, Yan-Pei and Gambardella, Camilo and Phung, Dinh and Cai, Jianfei}, title = {Unified Camera Positional Encoding for Controlled Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38027-38037} }
Toward Low-Cost yet Effective Temporal Learning for UAV Tracking-
[pdf]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Chaocan and Liang, Qihua and Zhong, Bineng and Zu, Yanting and Xue, Yuanliang and Xia, Haiying and Song, Shuxiang}, title = {Toward Low-Cost yet Effective Temporal Learning for UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42538-42548} }
Rethinking Intermediate Representation for VLM-based Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Weiliang and Gao, Jialin and Pan, Jia-Hui and Wang, Gang and Li, Li Erran and Liu, Yun-Hui and Ding, Mingyu and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {Rethinking Intermediate Representation for VLM-based Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29652-29662} }
Learning Diffeomorphism for Medical Image Registration with Time-Embedded Architectures Using Semigroup Regularization-
[pdf]
[supp]
[bibtex]@InProceedings{Matinkia_2026_CVPR, author = {Matinkia, Mohammadjavad and Ray, Nilanjan}, title = {Learning Diffeomorphism for Medical Image Registration with Time-Embedded Architectures Using Semigroup Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28775-28785} }
Driving on Registers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kirby_2026_CVPR, author = {Kirby, Ellington and Boulch, Alexandre and Xu, Yihong and Yin, Yuan and Puy, Gilles and Zablocki, \'Eloi and Bursuc, Andrei and Gidaris, Spyros and Marlet, Renaud and Bartoccioni, Florent and Cao, Anh-Quan and Samet, Nermin and VU, Tuan-Hung and Cord, Matthieu}, title = {Driving on Registers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32058-32069} }
Sparse Task Vector Mixup with Hypernetworks for Efficient Knowledge Transfer in Whole-Slide Image Prognosis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Pei and Zeng, Xiangxiang and Ma, Tengfei and Xing, Yucheng and Ren, Xuanbai and Liu, Yiping}, title = {Sparse Task Vector Mixup with Hypernetworks for Efficient Knowledge Transfer in Whole-Slide Image Prognosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35238-35247} }
TokenTrace: Multi-Concept Attribution through Watermarked Token Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Li and Agarwal, Shruti and Collomosse, John and Xie, Pengtao and Asnani, Vishal}, title = {TokenTrace: Multi-Concept Attribution through Watermarked Token Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42942-42952} }
MAD: Modality-Adaptive Decoding for Mitigating Cross-Modal Hallucinations in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2026_CVPR, author = {Chung, Sangyun and Kim, Se Yeon and Chee, Youngchae and Ro, Yong Man}, title = {MAD: Modality-Adaptive Decoding for Mitigating Cross-Modal Hallucinations in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40175-40185} }
MMR-AD: A Large-Scale Multimodal Dataset for Benchmarking General Anomaly Detection with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Xincheng and Qian, Zefeng and Shi, Chao and Song, Jiayang and Zhang, Chongyang}, title = {MMR-AD: A Large-Scale Multimodal Dataset for Benchmarking General Anomaly Detection with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43072-43082} }
DeltaQuant: 4-bit Video Diffusion Models with Spatiotemporal Delta Smoothing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xingyang and Tesfai, Samuel and Zhang, Zhekai and Xi, Haocheng and Yang, Shuo and Zhang, Lvmin and Sun, Yufei and Peng, Kelly and Agrawala, Maneesh and Stoica, Ion and Keutzer, Kurt and Zhu, Jun-Yan and Han, Song and Lin, Yujun and Li, Muyang}, title = {DeltaQuant: 4-bit Video Diffusion Models with Spatiotemporal Delta Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43578-43588} }
Improving Motion in Image-to-Video Models via Adaptive Low-Pass Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, June Suk and Lee, Kyungmin and Yu, Sihyun and Choi, Yisol and Shin, Jinwoo and Lee, Kimin}, title = {Improving Motion in Image-to-Video Models via Adaptive Low-Pass Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40364-40375} }
Otil: Accelerating Diffusion Model Inference via Communication-Efficient Multi-GPU Parallelism-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xin and Tian, Shujun and Lu, Tao and Bao, Han and Wang, Zonghui and Chen, Wenzhi}, title = {Otil: Accelerating Diffusion Model Inference via Communication-Efficient Multi-GPU Parallelism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38487-38497} }
AnomalyVFM -- Transforming Vision Foundation Models into Zero-Shot Anomaly Detectors-
[pdf]
[supp]
[bibtex]@InProceedings{Fucka_2026_CVPR, author = {Fu\v{c}ka, Matic and Zavrtanik, Vitjan and Sko\v{c}aj, Danijel}, title = {AnomalyVFM -- Transforming Vision Foundation Models into Zero-Shot Anomaly Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35555-35566} }
VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Junwen and Liang, Jinglin and Chen, Hongyuan and Huang, Shuangping}, title = {VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37918-37928} }
TreeTeaming: Autonomous Red-Teaming of Vision-Language Models via Hierarchical Strategy Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chunxiao and Li, Lijun and Shao, Jing}, title = {TreeTeaming: Autonomous Red-Teaming of Vision-Language Models via Hierarchical Strategy Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37684-37693} }
Learning Latent Concepts for Detecting Out-of-Distribution Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Ting and Dong, Junhao and Ong, Yew-Soon}, title = {Learning Latent Concepts for Detecting Out-of-Distribution Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28723-28733} }
HiCoGen: Hierarchical Compositional Text-to-Image Generation in Diffusion Models via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Hongji and Zhou, Yucheng and Han, Wencheng and Tao, Runzhou and Qiu, Zhongying and Yang, Jianfei and Shen, Jianbing}, title = {HiCoGen: Hierarchical Compositional Text-to-Image Generation in Diffusion Models via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36793-36802} }
QUANTIPHY: A Quantitative Benchmark Evaluating Physical Reasoning Abilities of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Puyin_2026_CVPR, author = {Puyin, Li and Xiang, Tiange and Mao, Ella and Wei, Shirley and Chen, Xinye and Masood, Adnan and Fei-Fei, Li and Adeli, Ehsan}, title = {QUANTIPHY: A Quantitative Benchmark Evaluating Physical Reasoning Abilities of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33174-33184} }
From None to All: Self-Supervised 3D Reconstruction via Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Ranran and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {From None to All: Self-Supervised 3D Reconstruction via Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37358-37369} }
PLACID: Identity-Preserving Multi-Object Compositing via Video Diffusion with Synthetic Trajectories-
[pdf]
[supp]
[bibtex]@InProceedings{Tarres_2026_CVPR, author = {Tarr\'es, Gemma Canet and Baradad, Manel and Moreno-Noguer, Francesc and Li, Yumeng}, title = {PLACID: Identity-Preserving Multi-Object Compositing via Video Diffusion with Synthetic Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38060-38070} }
Deconstructing the Failure of Ideal Noise Correction: A Three-Pillar Diagnosis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Chen and Zhi, Zhuo and Huang, Zhao and Ge, Jiawei and Xiao, Ling and Sebe, Nicu and Tzimiropoulos, Georgios and Patras, Ioannis}, title = {Deconstructing the Failure of Ideal Noise Correction: A Three-Pillar Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34512-34523} }
Temporal Interaction in Spiking Transformers with Multi-Delay Mixer-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Kexin and Liu, Hanwen and Song, Zeyang and Liu, Yang and Zhang, Jieyuan and Wang, Shuai and Wu, Jibin and Zhang, Malu and Yang, Yang}, title = {Temporal Interaction in Spiking Transformers with Multi-Delay Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34555-34565} }
Sketch2Colab: Sketch-Conditioned Multi-Human Animation via Controllable Flow Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Daiya_2026_CVPR, author = {Daiya, Divyanshu and Bera, Aniket}, title = {Sketch2Colab: Sketch-Conditioned Multi-Human Animation via Controllable Flow Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30675-30685} }
Efficient Equivariant Transformer for Self-Driving Agent Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Scott and Chen, Dian and Wong, Kelvin and Zhang, Chris and Fallah, Kion and Urtasun, Raquel}, title = {Efficient Equivariant Transformer for Self-Driving Agent Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32164-32172} }
Temporal Representation Enhancement (TRE): Learning to Forget Dominant Patterns for Enhanced Temporal Spiking Features-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Wei and Yang, Li and Wang, Yufei and Xiao, Han and Cai, Boyu and Hu, Weiming}, title = {Temporal Representation Enhancement (TRE): Learning to Forget Dominant Patterns for Enhanced Temporal Spiking Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34480-34490} }
Paper2Figure: A Multi-Agent Collaborative System for Figure Generation Towards Academic Research Paper-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Siwei and Ji, Haonian and Xin, Siyang and Shi, Juanquan and Qiu, Shi and Ye, Xinyu and Xia, Peng and Liu, Jiaqi and Chen, Zhaorun and Zhou, Yiyang and Li, Linjie and Wang, Lijuan and Yao, Huaxiu}, title = {Paper2Figure: A Multi-Agent Collaborative System for Figure Generation Towards Academic Research Paper}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29157-29166} }
OctoT2I: A Self-Evolving Agentic Text-to-Image Router-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xu and Chen, Bin and Li, Gehui and Duan, Yule and Wang, Ronggang and Zhang, Jian}, title = {OctoT2I: A Self-Evolving Agentic Text-to-Image Router}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31628-31638} }
Dynamic Label Noise Suppression with Optimal Teacher Pool for Facial Expression Recognition-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuzhuang and Tian, Xiaolin and Sun, Qigong}, title = {Dynamic Label Noise Suppression with Optimal Teacher Pool for Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32389-32398} }
YOSE: You Only Select Essential Tokens for Efficient DiT-based Video Object Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyang and Lei, Lina and Li, Fan and Guo, Chunle and Kong, Dehong and Qin, Xinran and Wang, Zhixin and Cheng, Mingming and Li, Chongyi}, title = {YOSE: You Only Select Essential Tokens for Efficient DiT-based Video Object Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32926-32935} }
Virtual Nodes Guided Dynamic Graph Neural Network for Brain Tumor Segmentation with Missing Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Sha and Pan, Jiao and Guo, Yu and Yao, Chao}, title = {Virtual Nodes Guided Dynamic Graph Neural Network for Brain Tumor Segmentation with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37528-37537} }
MatAnyone 2: Scaling Video Matting via a Learned Quality Evaluator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Peiqing and Zhou, Shangchen and Hao, Kai and Tao, Qingyi}, title = {MatAnyone 2: Scaling Video Matting via a Learned Quality Evaluator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37476-37485} }
Harmonic Canvas: Inversion-Free Editing for Visually-Guided Music Style Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Yue and Yang, Siqi and Zhong, Ting and Zhou, Fan}, title = {Harmonic Canvas: Inversion-Free Editing for Visually-Guided Music Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29727-29737} }
VideoCoF: Unified Video Editing with Temporal Reasoner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xiangpeng and Xie, Ji and Yang, Yiyuan and Ma, Yue and Huang, Yan and Xu, Min and Wu, Qiang}, title = {VideoCoF: Unified Video Editing with Temporal Reasoner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37940-37949} }
Motion 3-to-4: 3D Motion Reconstruction for 4D Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hongyuan and Chen, Xingyu and Xu, Zexiang and Chen, Anpei}, title = {Motion 3-to-4: 3D Motion Reconstruction for 4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28947-28958} }
Generative Modeling of Weights: Generalization or Memorization?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Boya and Yin, Yida and Xu, Zhiqiu and Liu, Zhuang}, title = {Generative Modeling of Weights: Generalization or Memorization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41974-41984} }
Addressing Exacerbated Attention Sink for Source-Free Cross-Domain Few-Shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2026_CVPR, author = {Yi, Shuai and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Addressing Exacerbated Attention Sink for Source-Free Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29494-29503} }
MODIX: A Training-Free Multimodal Information-Driven Positional Index Scaling for Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Ruoxiang and Yuan, Zhen}, title = {MODIX: A Training-Free Multimodal Information-Driven Positional Index Scaling for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31534-31543} }
CoIn3D: Revisiting Configuration-Invariant Multi-Camera 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuang_2026_CVPR, author = {Kuang, Zhaonian and Ding, Rui and Wang, Haotian and Zheng, Xinhu and Yang, Meng and Hua, Gang}, title = {CoIn3D: Revisiting Configuration-Invariant Multi-Camera 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40707-40716} }
StreamRAG: Enhancing Real-Time Video Understanding with Retrieval Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Junlin and Zheng, Quanlong and Zhang, Ruifei and Wang, Kuo and Zhang, Yanhao and Luo, Jinguo and Lu, Haonan and Wan, Xiang and Li, Guanbin}, title = {StreamRAG: Enhancing Real-Time Video Understanding with Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38870-38879} }
Improving Vision-language Models with Perception-centric Process Reward Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Min_2026_CVPR, author = {Min, Yingqian and Zhou, Kun and Li, Yifan and Wu, Yuhuan and Peng, Han and Du, Yifan and Zhao, Wayne Xin and Yang, Min and Wen, Ji-Rong}, title = {Improving Vision-language Models with Perception-centric Process Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33099-33109} }
Spatial Matters: Position-Guided 3D Referring Expression Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yabing and Tian, Zhuotao and Wang, Le and Qin, Zheng and Zhou, Sanping}, title = {Spatial Matters: Position-Guided 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39486-39496} }
ReHyAt: Recurrent Hybrid Attention for Video Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghafoorian_2026_CVPR, author = {Ghafoorian, Mohsen and Habibian, Amirhossein}, title = {ReHyAt: Recurrent Hybrid Attention for Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40674-40684} }
MultiCrafter: High-Fidelity Multi-Subject Generation via Disentangled Attention and Identity-Aware Preference Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Jiang, Yibo and Lu, Yehao and Wang, Zhizhong and Huang, Zeyi and Qin, Zequn and Li, Xi}, title = {MultiCrafter: High-Fidelity Multi-Subject Generation via Disentangled Attention and Identity-Aware Preference Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36691-36702} }
Benchmarking PhD-Level Coding in 3D Geometric Computer Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wenyi and Luo, Renkai and Yu, Yue and Gao, Huan-ang and Gao, Mingju and Yuan, Li and Fu, Chaoyou and Zhao, Hao}, title = {Benchmarking PhD-Level Coding in 3D Geometric Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30974-30985} }
HP-Edit: A Human-Preference Post-Training Framework for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Fan and Wang, Chonghuinan and Lei, Lina and Qiu, Yuping and Xu, Jiaqi and Jiang, Jiaxiu and Qin, Xinran and Chen, Zhikai and Song, Fenglong and Wang, Zhixin and Pei, Renjing and Zuo, Wangmeng}, title = {HP-Edit: A Human-Preference Post-Training Framework for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43113-43123} }
SocialNav: Training Human-Inspired Foundation Model for Socially-Aware Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ziyi and Guo, Yingnan and Chu, Zedong and Luo, Minghua and Shen, Yanfen and Sun, Mingchao and Hu, Junjun and Xie, Shichao and Kuan, Yang and Shi, Pei and Gu, Zhining and Liu, Lu and Han, Honglin and Wu, Xiaolong and Xu, Mu and Zhang, Yu}, title = {SocialNav: Training Human-Inspired Foundation Model for Socially-Aware Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28796-28806} }
MSRL: Scaling Generative Multimodal Reward Modeling via Multi-Stage Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenglong and Huo, Yifu and Gan, Yang and He, Qiaozhi and Meng, Qi and Li, Bei and Wang, Yan and Liu, Junfu and Zhou, Tianhua and Zhu, Jingbo and Xiao, Tong}, title = {MSRL: Scaling Generative Multimodal Reward Modeling via Multi-Stage Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29410-29420} }
PixelRush: Ultra-Fast, Training-Free High-Resolution Image Generation via One-step Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Hong-Phuc and Nguyen, Phong and Tran, Anh}, title = {PixelRush: Ultra-Fast, Training-Free High-Resolution Image Generation via One-step Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35946-35955} }
Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Xinxuan and Fowlkes, Charless and Berg, Alexander C.}, title = {Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29222-29232} }
It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Harrington_2026_CVPR, author = {Harrington, Anne and Koepke, A. Sophia and Karthik, Shyamgopal and Darrell, Trevor and Efros, Alexei A.}, title = {It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43124-43134} }
CamDirector: Towards Long-Term Coherent Video Trajectory Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Kejia and Shi, Zhihao and Wan, Weilin and Zhou, Yuhongze and Yu, Yuanhao and Zuo, Xinxin and Sun, Qiang and Lu, Juwei}, title = {CamDirector: Towards Long-Term Coherent Video Trajectory Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32683-32692} }
SpiderCam: Low-Power Snapshot Depth from Differential Defocus-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ferreira_2026_CVPR, author = {Ferreira, Marcos A. and Li, Tianao and Mamish, John and Hester, Josiah and Sangar, Yaman and Guo, Qi and Alexander, Emma}, title = {SpiderCam: Low-Power Snapshot Depth from Differential Defocus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41699-41709} }
Denoising, Fast and Slow: Difficulty-Aware Adaptive Sampling for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schusterbauer_2026_CVPR, author = {Schusterbauer, Johannes and Gui, Ming and Li, Yusong and Ma, Pingchuan and Krause, Felix and Ommer, Bj\"orn}, title = {Denoising, Fast and Slow: Difficulty-Aware Adaptive Sampling for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43260-43270} }
Towards Human-Like Robot Handwriting via Contour-Aware Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yutao and Dai, Gang and Zhang, Yifan and Han, Youwei and He, Qisheng and Huang, Shuangping}, title = {Towards Human-Like Robot Handwriting via Contour-Aware Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31597-31607} }
UniT: Unified Multimodal Chain-of-Thought Test-time Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Leon Liangyu and Ma, Haoyu and Fan, Zhipeng and Huang, Ziqi and Sinha, Animesh and Dai, Xiaoliang and Wang, Jialiang and He, Zecheng and Yang, Jianwei and Li, Chunyuan and Sun, Junzhe and Wang, Chu and Yeung-Levy, Serena and Juefei-Xu, Felix}, title = {UniT: Unified Multimodal Chain-of-Thought Test-time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30257-30267} }
VoxTell: Free-Text Promptable Universal 3D Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rokuss_2026_CVPR, author = {Rokuss, Maximilian and Langenberg, Moritz and Kirchhoff, Yannick and Isensee, Fabian and Hamm, Benjamin and Ulrich, Constantin and Regnery, Sebastian and Bauer, Lukas and Katsigiannopulos, Efthimios and Norajitra, Tobias and Maier-Hein, Klaus}, title = {VoxTell: Free-Text Promptable Universal 3D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37538-37557} }
VGent: Visual Grounding via Modular Design for Disentangling Reasoning and Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Weitai and Kuen, Jason and Ren, Mengwei and Wei, Zijun and Yan, Yan and Liu, Kangning}, title = {VGent: Visual Grounding via Modular Design for Disentangling Reasoning and Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41160-41170} }
Probabilistic Prompt Adaptation for Unified Image Aesthetics and Quality Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Hara_2026_CVPR, author = {Hara, Takayuki and Otsuka, Yuya}, title = {Probabilistic Prompt Adaptation for Unified Image Aesthetics and Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37236-37246} }
SMAP: Semantic Route Planning with Map-Grounded Multimodal Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenjie and Yang, Chen and Lu, Xin and Wang, Zhen and Liu, Yue and Xi, Bobo and Zhang, Pengbo}, title = {SMAP: Semantic Route Planning with Map-Grounded Multimodal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40108-40118} }
ActivityForensics: A Comprehensive Benchmark for Localizing Manipulated Activity in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Peijun and Luo, Anwei and Pan, Gang and Kot, Alex C. and Jiang, Xudong}, title = {ActivityForensics: A Comprehensive Benchmark for Localizing Manipulated Activity in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42987-42996} }
Mitigating The Distribution Shift of Diffusion-based Dataset Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yue and Hu, Chenyu and An, Pengyu and Li, Yong-Lu}, title = {Mitigating The Distribution Shift of Diffusion-based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33943-33952} }
Generative Video Compression with One-Dimensional Latent Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zihan and Jia, Zhaoyang and Xue, Naifu and Li, Jiahao and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Chen, Zhenghao and Li, Houqiang and Lu, Yan}, title = {Generative Video Compression with One-Dimensional Latent Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41256-41265} }
ExPose: Reinforcing Video Generation Models for Extreme Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Youngho and Cho, Wonjune and Ha, Hyunho and Kim, Sujung and Yoon, Kuk-Jin}, title = {ExPose: Reinforcing Video Generation Models for Extreme Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32636-32646} }
Multi-SpatialMLLM: Multi-Frame Spatial Understanding with Multi-Modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Runsen and Wang, Weiyao and Tang, Hao and Chen, Xingyu and Wang, Xiaodong and Chu, Fu-Jen and Feiszli, Matt and Liang, Kevin J.}, title = {Multi-SpatialMLLM: Multi-Frame Spatial Understanding with Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31078-31088} }
DocSeeker: Structured Visual Reasoning with Evidence Grounding for Long Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Hao and Liu, Yuliang and Liu, Xingchen and Zhang, Yuyi and Liao, Minghui and Wu, Jihao and Chen, Wei and Bai, Xiang}, title = {DocSeeker: Structured Visual Reasoning with Evidence Grounding for Long Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41140-41149} }
DriverGaze360: OmniDirectional Driver Attention with Object-Level Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Govil_2026_CVPR, author = {Govil, Shreedhar and Stricker, Didier and Rambach, Jason}, title = {DriverGaze360: OmniDirectional Driver Attention with Object-Level Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39786-39795} }
FlashIn: Fast and Accurate Image Inversion for Real-time Image Editing-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Guangzhi}, title = {FlashIn: Fast and Accurate Image Inversion for Real-time Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30425-30434} }
Gloria: Consistent Character Video Generation via Content Anchors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuhang and Zhang, Fan and Pi, Huaijin and Zeng, Ailing and Guo, Shuai and Xu, Guowei and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {Gloria: Consistent Character Video Generation via Content Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36724-36735} }
Action-Geometry Prediction with 3D Geometric Prior for Bimanual Manipulation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Chongyang and Li, Haipeng and Cheng, Shen and Fan, Haoqiang and Feng, Ziliang and Liu, Shuaicheng}, title = {Action-Geometry Prediction with 3D Geometric Prior for Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35036-35046} }
Revisiting 2D Foundation Models for Scalable 3D Medical Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Han and Georgescu, Bogdan and Zhang, Yanbo and Yoo, Youngjin and Baumgartner, Michael and Gao, Riqiang and Wang, Jianing and Zhao, Gengyan and Gibson, Eli and Comaniciu, Dorin and Grbic, Sasa}, title = {Revisiting 2D Foundation Models for Scalable 3D Medical Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30021-30031} }
UniGen-1.5: Enhancing Image Generation and Editing through Reward Unification in RL-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Rui and Gao, Mingfei and Gang, Haiming and Lu, Jiasen and Gan, Zhe and Yang, Yinfei and Wu, Zuxuan and Dehghan, Afshin}, title = {UniGen-1.5: Enhancing Image Generation and Editing through Reward Unification in RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29367-29378} }
Leveraging Verifier-Based Reinforcement Learning in Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhong and Wu, Jie and Liu, Jie and Gao, Yu and Ye, Zilyu and Yuan, Linxiao and Wang, Xionghui and Yu, Yizhou and Huang, Weilin}, title = {Leveraging Verifier-Based Reinforcement Learning in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34343-34352} }
Imbalanced View Contribution Evaluation and Refinement for Deep Incomplete Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Taichun and Dong, Zhibin and Tan, Hao and Wang, Siwei and Liu, Xinwang and Zhu, En and Hu, Di and Liu, Tianrui and Li, Chuankun and He, Kunlun}, title = {Imbalanced View Contribution Evaluation and Refinement for Deep Incomplete Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39606-39616} }
InnoAds-Composer: Efficient Condition Composition for E-Commerce Poster Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yuxin and Cao, Ke and Liu, Haowei and Ma, Ao and Li, Fengheng and Zhu, Honghe and Zhang, Zheng and Ling, Run and Feng, Wei and He, Xuanhua and Zhang, Zhanjie and Guo, Zhen and Bian, Haoyi and Lv, Jingjing and Shen, Junjie and Law, Ching}, title = {InnoAds-Composer: Efficient Condition Composition for E-Commerce Poster Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32988-32999} }
Progressive Multi-cue Alignment for Unaligned RGBT Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Jiandong and Li, Chenglong and Feng, Hao and Lu, Andong and Huang, Lili and Tang, Jin}, title = {Progressive Multi-cue Alignment for Unaligned RGBT Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35207-35216} }
DynamicVGGT: Learning Dynamic Point Maps for 4D Scene Reconstruction in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zhuolin and Li, Jing and Li, Guanghao and Chen, Xiaolei and Tang, Jiacheng and Zhang, Siyang and Jin, Zhounan and Cai, Feipeng and Li, Bin and Pu, Jian and Cai, Jia and Xue, Xiangyang}, title = {DynamicVGGT: Learning Dynamic Point Maps for 4D Scene Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35670-35679} }
Semantics Lead the Way: Harmonizing Semantic and Texture Modeling with Asynchronous Latent Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Yueming and Feng, Ruoyu and Dai, Qi and Wang, Yuqi and Lin, Wenfeng and Guo, Mingyu and Luo, Chong and Zheng, Nanning}, title = {Semantics Lead the Way: Harmonizing Semantic and Texture Modeling with Asynchronous Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43664-43674} }
MPL: Match-guided Prototype Learning for Few-shot Action Recognition-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Feng and Zhao, Jie and Luo, Fulin and Qin, Anyong and Song, Tiecheng and Zhao, Yue and Gao, Chenqiang and Han, Junwei}, title = {MPL: Match-guided Prototype Learning for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34439-34448} }
LUMINA: A Multi-Vendor Mammography Benchmark with Energy Harmonization Protocol-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Hongyi and Durak, Gorkem and Aktas, Halil Ertugrul and Bejar, Andrea M. and Tutun, Baver and Uysal, Emre and Bulbul, Ezgi and Dogan, Mehmet Fatih and Erok, Berrin and Yildirim, Berna Akkus and Erturk, Sukru Mehmet and Bagci, Ulas}, title = {LUMINA: A Multi-Vendor Mammography Benchmark with Energy Harmonization Protocol}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35301-35310} }
VerseCrafter: Dynamic Realistic Video World Model with 4D Geometric Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Sixiao and Yin, Minghao and Hu, Wenbo and Li, Xiaoyu and Shan, Ying and Fu, Yanwei}, title = {VerseCrafter: Dynamic Realistic Video World Model with 4D Geometric Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40277-40290} }
PhysSkin: Real-Time and Generalizable Physics-Based Animation via Self-Supervised Neural Skinning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Yuanhang and Cheng, Tao and Li, Xingxuan and Zhao, Boming and Huang, Siyuan and Hu, Ruizhen and Chen, Peter Yichen and Bao, Hujun and Cui, Zhaopeng}, title = {PhysSkin: Real-Time and Generalizable Physics-Based Animation via Self-Supervised Neural Skinning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32357-32366} }
Learning Where to Look and How to Judge: Resolution-agnostic Image Quality Assessment with Quality-aware Saliency-
[pdf]
[supp]
[bibtex]@InProceedings{Gedik_2026_CVPR, author = {Gedik, Hakan Emre and Gupta, Shashank and Bovik, Alan}, title = {Learning Where to Look and How to Judge: Resolution-agnostic Image Quality Assessment with Quality-aware Saliency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37507-37517} }
LRHDR: Learning Representation-enhanced HDR Video Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Chenzhuo and Chen, Xin and Li, Bingchen and Meng, Yu and Yue, Tao and Hu, Xuemei}, title = {LRHDR: Learning Representation-enhanced HDR Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41584-41593} }
Prompt-Free Unknown Label Generation for Open World Detection in Remote Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Azeem_2026_CVPR, author = {Azeem, Abdullah and Wang, Ruisheng and Li, Qingquan and Siddique, Abubakar}, title = {Prompt-Free Unknown Label Generation for Open World Detection in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34662-34672} }
Beyond the Golden Data: Resolving the Motion-Vision Quality Dilemma via Timestep Selective Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Xiangyang and Li, Qingyu and Li, Yuming and Huang, Guanbo and Zhu, Yongjie and Qin, Wenyu and Wang, Meng and Wan, Pengfei and Huang, Shao-Lun}, title = {Beyond the Golden Data: Resolving the Motion-Vision Quality Dilemma via Timestep Selective Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43440-43449} }
Diffusion Probe: Generated Image Result Prediction Using CNN Probes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Bukun and Cui, Benlei and Ye, Zhizeng and Dong, Xuemei and Chen, Tuo and Xue, Hui and Yang, Dingkang and Huang, Longtao and Hong, Haiwen and Tang, Jingqun}, title = {Diffusion Probe: Generated Image Result Prediction Using CNN Probes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35926-35935} }
Cross-modal Representation Learning for Diffusion-generated Image Detection-
[pdf]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Tao and Wang, Dayong and Chu, Qi and Liu, Bin and Yu, Nenghai}, title = {Cross-modal Representation Learning for Diffusion-generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36092-36102} }
Describe Anything Anywhere At Any Moment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gorlo_2026_CVPR, author = {Gorlo, Nicolas and Schmid, Lukas and Carlone, Luca}, title = {Describe Anything Anywhere At Any Moment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35002-35013} }
Mining Attribute Subspaces for Efficient Fine-tuning of 3D Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yu and Jiang, Hanwen and Abdelkader, Ahmed and Chu, Wen-Sheng and Feng, Brandon and Wang, Zhangyang and Huang, Qixing}, title = {Mining Attribute Subspaces for Efficient Fine-tuning of 3D Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29071-29080} }
Beyond Static Frames: Temporal Aggregate-and-Restore Vision Transformer for Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Hongwei and Cai, Jiahang and Wang, Xun and Yang, Wenwu}, title = {Beyond Static Frames: Temporal Aggregate-and-Restore Vision Transformer for Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42891-42900} }
Electromagnetic Inverse Scattering from a Single Transmitter-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yizhe and Tian, Chunxun and Wang, Haoru and Zhu, Wentao and Ma, Xiaoxuan and Wang, Yizhou}, title = {Electromagnetic Inverse Scattering from a Single Transmitter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34040-34049} }
Physically-Grounded Turbulence Mitigation with Frame-Shared Degradation Parameters-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Dongxin and Huang, Yan and Xu, Yong and Ji, Hui}, title = {Physically-Grounded Turbulence Mitigation with Frame-Shared Degradation Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29919-29928} }
Exploring the Underwater World Segmentation without Extra Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bingyu and Huo, Tao and Zhang, Da and Zhao, Zhiyuan and Gao, Junyu and Li, Xuelong}, title = {Exploring the Underwater World Segmentation without Extra Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39879-39889} }
HOLO: Homography-Guided Pose Estimator Network for Fine-Grained Visual Localization on SD Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Xuchang and Cao, Xu and Feng, Jinke and Fang, Hao}, title = {HOLO: Homography-Guided Pose Estimator Network for Fine-Grained Visual Localization on SD Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41376-41385} }
OVOD-Agent: A Markov-Bandit Framework for Proactive Visual Reasoning and Self-Evolving Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chujie and Lu, Jianyu and Luo, Zhiyuan and Chen, Xi and He, Chu}, title = {OVOD-Agent: A Markov-Bandit Framework for Proactive Visual Reasoning and Self-Evolving Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41416-41425} }
CAST: Context-Aware Dynamic Latent Space Transformation for Interactive Text-to-Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Xuanzuo and Zhang, Min and Liu, Daizong and Zuo, Zhiwen and Yang, Xun and Lin, Changting and Wang, Xun and Dong, Jianfeng}, title = {CAST: Context-Aware Dynamic Latent Space Transformation for Interactive Text-to-Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38794-38803} }
EE-RL: Vision Language Guided Reinforcement Learning with Explorer and Expert model for End-to-End Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaolong and Yang, Lan and Li, Ruyang and Fang, Shan and Liu, Yang and Zhao, Xiangmo}, title = {EE-RL: Vision Language Guided Reinforcement Learning with Explorer and Expert model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32082-32092} }
Consistency Beyond Contrast: Enhancing Open-Vocabulary Object Detection Robustness via Contextual Consistency Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bozhao and Wu, Shaocong and Shao, Tong and Yang, Senqiao and Shan, Qiben and Tian, Zhuotao and Su, Jingyong}, title = {Consistency Beyond Contrast: Enhancing Open-Vocabulary Object Detection Robustness via Contextual Consistency Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34617-34627} }
NeuROK: Generative 4D Neural Object Kinematics-
[pdf]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Chen and He, Guangzhao and Gao, Yue and Zhang, Yunzhi and Wu, Shangzhe and Wu, Jiajun}, title = {NeuROK: Generative 4D Neural Object Kinematics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39239-39251} }
Streaming Video Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Jiaer and Chen, Peixian and Zhang, Mengdan and Sun, Xing and Zhou, Kaiyang}, title = {Streaming Video Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31219-31229} }
Detecting AI-Generated Forgeries via Iterative Manifold Deviation Amplification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiangling and Gao, Shuxuan and Liu, Bofan and Feng, Siqiang and Huang, Jirui and Chen, Yaxiong and Chen, Ziyu}, title = {Detecting AI-Generated Forgeries via Iterative Manifold Deviation Amplification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35494-35503} }
OneCAT: Decoder-Only Auto-Regressive Model for Unified Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Han and Peng, Xinyu and Wang, Yaoming and Peng, Zelin and Chen, Xin and Weng, Rongxiang and Wang, Jingang and Cai, Xunliang and Dai, Wenrui and Xiong, Hongkai}, title = {OneCAT: Decoder-Only Auto-Regressive Model for Unified Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30235-30245} }
A Closed-Form Solution for Debiasing Vision-Language Models with Utility Guarantees Across Modalities and Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Tangzheng and Hu, Guanyu and Ren, Yijing and Kollias, Dimitrios and Celiktutan, Oya}, title = {A Closed-Form Solution for Debiasing Vision-Language Models with Utility Guarantees Across Modalities and Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31672-31682} }
Vista4D: Video Reshooting with 4D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Kuan Heng and Liu, Zhizheng and Salamanca, Pablo and Kant, Yash and Burgert, Ryan and Xu, Yuancheng and Namekata, Koichi and Zhao, Yiwei and Zhou, Bolei and Goldblum, Micah and Debevec, Paul and Yu, Ning}, title = {Vista4D: Video Reshooting with 4D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32671-32682} }
LookasideVLN: Direction-Aware Aerial Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Yuwei and Zhao, Ganlong and Qin, Yipeng and Liu, Si and Liu, Yang and Lin, Liang and Li, Guanbin}, title = {LookasideVLN: Direction-Aware Aerial Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32441-32450} }
Tracking through Severe Occlusion via Event-Derived Transient Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Hao and Liu, Yujin and Liu, Haoyue and Wang, Zhenyu and Peng, Shihan and Shi, Zhiwei and Chang, Yi and Yan, Luxin}, title = {Tracking through Severe Occlusion via Event-Derived Transient Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29526-29536} }
FoleyDirector: Fine-Grained Temporal Steering for Video-to-Audio Generation via Structured Scripts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, You and Zhou, Dewei and Ma, Fan and Li, Fu and He, Dongliang and Yang, Yi}, title = {FoleyDirector: Fine-Grained Temporal Steering for Video-to-Audio Generation via Structured Scripts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29254-29264} }
Stability-Driven Motion Generation for Object-Guided Human-Human Co-Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiahao and Yuan, Xiaohan and Wu, Xingchen and Xu, Chongyang and Li, Kun and Huang, Buzhen}, title = {Stability-Driven Motion Generation for Object-Guided Human-Human Co-Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38355-38365} }
Diagnosing and Repairing Unsafe Channels in Vision-Language Models via Causal Discovery and Dual-Modal Safety Subspace Projection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Jinhu and Lou, Yihang and Si, Qingyi and Zhang, Shudong and Su, Sen}, title = {Diagnosing and Repairing Unsafe Channels in Vision-Language Models via Causal Discovery and Dual-Modal Safety Subspace Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31693-31702} }
Hierarchically Robust Zero-shot Vision-language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Junhao and Zhang, Yifei and Zhu, Hao and Ong, Yew-Soon and Koniusz, Piotr}, title = {Hierarchically Robust Zero-shot Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37642-37652} }
CLEP: Contrastive Language-Pose Pretraining-
[pdf]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Sen and Wang, Huayu and Huang, Hsiang-Wei and An, Zhaochong and Hwang, Jenq-Neng and Zhang, Huaping and Li, Lei}, title = {CLEP: Contrastive Language-Pose Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30696-30706} }
AutoDebias: An Automated Framework for Detecting and Mitigating Backdoor Biases in Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Hongyi and Rahman, Mohammad Mahdinur and Dong, MingKang and Pu, Muxin and Aloqaily, Moayad and Li, Jie and Li, Xinfeng and Shen, Jialie and Qiu, Meikang and Wen, Qingsong}, title = {AutoDebias: An Automated Framework for Detecting and Mitigating Backdoor Biases in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29285-29294} }
Vision-Oriented Lightweight Neural Architecture Search with Budget-Adaptive Evaluation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yi and Yang, Yu-Bin}, title = {Vision-Oriented Lightweight Neural Architecture Search with Budget-Adaptive Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41985-41995} }
NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zoomers_2026_CVPR, author = {Zoomers, Brent and Hahlbohm, Florian and Vanherck, Joni and Jorissen, Lode and Magnor, Marcus and Michiels, Nick}, title = {NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29824-29833} }
CADC: Content Adaptive Diffusion-Based Generative Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Xihua and Zhu, Lingyu and Zhang, Tianyu and Liu, Dong and Wang, Shiqi and Wang, Jing}, title = {CADC: Content Adaptive Diffusion-Based Generative Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32936-32946} }
ReMoE: Region-Mixture Experts for Adversarially-Robust Vision Transformers-
[pdf]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Qinghao and Chen, Bingzhi and Liu, Yishu and Lu, Minhua and Lu, Guangming}, title = {ReMoE: Region-Mixture Experts for Adversarially-Robust Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37674-37683} }
Hyperbolic Gramian Volumes for Multimodal Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Na_2026_CVPR, author = {Na, Saiyang and Jiang, Feng and Zhou, Qifeng and Zhong, Wenliang and Dang, Thao M. and Guo, Yuzhi and Ma, Hehuan and Li, Chunyuan and An, Weizhi and Huang, Junzhou}, title = {Hyperbolic Gramian Volumes for Multimodal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37756-37765} }
ProSoftArena: Benchmarking Hierarchical Capabilities of Multi-modal Agents in Professional Software Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Ai_2026_CVPR, author = {Ai, Jiaxin and Feng, Yukang and Zhang, Fanrui and Sun, Jianwen and Li, Zizhen and Li, Chuanhao and Chang, Yifan and Wu, Wenxiao and Wang, Ruoxi and Zhai, Mingliang and Zhang, Kaipeng}, title = {ProSoftArena: Benchmarking Hierarchical Capabilities of Multi-modal Agents in Professional Software Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34586-34595} }
DynamicGTR: Leveraging Graph Topology Representation Preferences to Boost VLM Capabilities on Graph QAs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yanbin and Yan, Jiangyue and Kang, Chun and Chen, Yang and Liu, Hua and Kwok, James and Zhang, Yu}, title = {DynamicGTR: Leveraging Graph Topology Representation Preferences to Boost VLM Capabilities on Graph QAs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40822-40832} }
BoostSLT: Boosting Sign Language Translation via a Plug-and-Play Diffusion-Based Semantic Enhancer-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Changzhou and Ma, Wanlun and Tang, Xi and Hu, Kun and Wen, Sheng and Xiang, Yang}, title = {BoostSLT: Boosting Sign Language Translation via a Plug-and-Play Diffusion-Based Semantic Enhancer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28828-28837} }
Hg-I2P: Bridging Modalities for Generalizable Image-to-Point-Cloud Registration via Heterogeneous Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Pei and Ding, Junfeng and Yang, Jiaqi and Wang, Yulong and Ma, Jie and Nan, Liangliang}, title = {Hg-I2P: Bridging Modalities for Generalizable Image-to-Point-Cloud Registration via Heterogeneous Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39042-39051} }
UniEdit-I: Training-free Image Editing for Unified VLM via Iterative Understanding, Editing and Verifying-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Chengyu and Chen, Jintao and Bai, Xiang and Chen, Yilong and She, Qi and Lu, Ming and Zhang, Shanghang}, title = {UniEdit-I: Training-free Image Editing for Unified VLM via Iterative Understanding, Editing and Verifying}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29750-29759} }
ZOO-Prune: Training-Free Token Pruning via Zeroth-Order Gradient Estimation in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Youngeun and Zhang, Youjia and Liu, Huiling and Jung, Aecheon and Lee, Sunwoo and Hong, Sungeun}, title = {ZOO-Prune: Training-Free Token Pruning via Zeroth-Order Gradient Estimation in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39572-39582} }
Immunizing Models Against Harmful Long-Horizon Fine-Tuning via Contractive Optimization Dynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Sarker_2026_CVPR, author = {Sarker, Najibul Haque and Ibn Abdul Hakim, Zaber and Asgarov, Ali and Tang, Chia-Wei and Ishmam, Alvi Md and Thomas, Chris}, title = {Immunizing Models Against Harmful Long-Horizon Fine-Tuning via Contractive Optimization Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34940-34949} }
Chain-of-Thought Guided Multi-Modal Object Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Ya and Li, Shihao and Liu, Zhaojun and Zheng, Aihua and Li, Chenglong and Tang, Jin}, title = {Chain-of-Thought Guided Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37705-37714} }
MimicTalker: A Multimodal Interactive and Memory-Enhanced Framework for Real-Time Dyadic 3D Head Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yinuo and Fan, Yanbo and Wang, Xuan and Zhou, Boyao and Guo, Yu and Shen, Yujun and Wang, Fei}, title = {MimicTalker: A Multimodal Interactive and Memory-Enhanced Framework for Real-Time Dyadic 3D Head Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32399-32409} }
From Rays to Projections: Better Inputs for Feed-Forward View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zirui and Jiang, Zeren and Oswald, Martin R. and Song, Jie}, title = {From Rays to Projections: Better Inputs for Feed-Forward View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29012-29022} }
Aligning Multi-Character Narrative Image Generation with Multi-Aspect Human Preferences-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Ziyi and Wei, Zhipeng and Chen, Jingjing and Tan, Zhiyu and Li, Hao and Chen, Yi-Ping Phoebe}, title = {Aligning Multi-Character Narrative Image Generation with Multi-Aspect Human Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29244-29253} }
PP-Brep: Few-Shot B-rep Classification with Hybrid Graph Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Jiacheng and Liu, Chunying and Guo, Hao and Wang, Ruohan and Gan, Hongping and Shi, Yilei}, title = {PP-Brep: Few-Shot B-rep Classification with Hybrid Graph Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41616-41625} }
Mitigating Instance Entanglement in Instance-Dependent Partial Label Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rui and Shi, Bin and Sun, Kai and Dong, Bo}, title = {Mitigating Instance Entanglement in Instance-Dependent Partial Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39659-39668} }
STUR3D: Spatio-Temporal Unified Representation Learning for 3D Object Detection-
[pdf]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Huijie and Huang, Pengrui and Wang, Qiang and Fan, Baojie and Dong, Jiahua and Qu, Liangqiong}, title = {STUR3D: Spatio-Temporal Unified Representation Learning for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33068-33077} }
VideoWeaver: Multimodal Multi-View Video-to-Video Transfer for Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Eskandar_2026_CVPR, author = {Eskandar, George and Shen, Fengyi and Altillawi, Mohammad and Chen, Dong and Bai, Yang and Yang, Liudi and Liu, Ziyuan}, title = {VideoWeaver: Multimodal Multi-View Video-to-Video Transfer for Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29620-29630} }
ResDiT: Evoking the Intrinsic Resolution Scalability in Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yiyang and Zhou, Feng and Yin, Xuedan and Cao, Pu and Dang, Yonghao and Yin, Jianqin}, title = {ResDiT: Evoking the Intrinsic Resolution Scalability in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40612-40621} }
REACH: Explicit Recovery Behavior for Diffusion Policies-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Zundong and Chen, Junlin and Zhu, Jiayi and Xia, Kuanhao and Zhao, Boyi and Gu, Jiayuan}, title = {REACH: Explicit Recovery Behavior for Diffusion Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38498-38508} }
Tracking by Predicting 3-D Gaussians Over Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baranwal_2026_CVPR, author = {Baranwal, Tanish and Singh, Himanshu Gaurav and Rajasegaran, Jathushan and Malik, Jitendra}, title = {Tracking by Predicting 3-D Gaussians Over Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42527-42537} }
TokenSplat: Token-aligned 3D Gaussian Splatting for Feed-forward Pose-free Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yihui and Lv, Chengxin and Tang, Zichen and Yang, Hongyu and Huang, Di}, title = {TokenSplat: Token-aligned 3D Gaussian Splatting for Feed-forward Pose-free Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40886-40895} }
Diagnose, Correct, and Learn from Manipulation Failures via Visual Symbols-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xianchao and Zhou, Xinyu and Li, Youcheng and Shi, Jiayou and Li, Tianle and Chen, Liangming and Ren, Lei and Li, Yong-Lu}, title = {Diagnose, Correct, and Learn from Manipulation Failures via Visual Symbols}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42386-42395} }
When Understanding Becomes a Risk: Authenticity and Safety Risks in the Emerging Image Generation Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2026_CVPR, author = {Leng, Ye and Chu, Junjie and Li, Mingjie and Lin, Chenhao and Shen, Chao and Backes, Michael and Shen, Yun and Zhang, Yang}, title = {When Understanding Becomes a Risk: Authenticity and Safety Risks in the Emerging Image Generation Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39372-39382} }
PAMotion: Physics-Aware Motion Generation for Full-Body Interaction with Multiple Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Di_2026_CVPR, author = {Di, Yan and Li, Yuheng and Wang, Yaoxing and Liu, Mengge and Gao, Shan and Ji, Xiangyang}, title = {PAMotion: Physics-Aware Motion Generation for Full-Body Interaction with Multiple Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30661-30674} }
Scaling View Synthesis Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Evan and Ryu, Hyunwoo and Mitchel, Thomas W. and Sitzmann, Vincent}, title = {Scaling View Synthesis Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28893-28902} }
Multi-modal Test-time Adaptation via Adaptive Probabilistic Gaussian Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jinglin and Li, Yi and Sun, Chuxiong and Xu, Xiao and Li, Jiangmeng and Xu, Fanjiang}, title = {Multi-modal Test-time Adaptation via Adaptive Probabilistic Gaussian Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30268-30277} }
Cross-Domain Few-Shot Segmentation via Multi-view Progressive Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2026_CVPR, author = {Nie, Jiahao and Fu, Guanqiao and An, Wenbin and Tan, Yap-Peng and Kot, Alex C. and Lu, Shijian}, title = {Cross-Domain Few-Shot Segmentation via Multi-view Progressive Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41594-41604} }
No Hard Negatives Required: Concept Centric Learning Leads to Compositionality without Degrading Zero-shot Capabilities of Contrastive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2026_CVPR, author = {Pham, Hai X. and Hoffmann, David T. and Guerrero, Ricardo and Martinez, Brais}, title = {No Hard Negatives Required: Concept Centric Learning Leads to Compositionality without Degrading Zero-shot Capabilities of Contrastive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33901-33910} }
Think-Then-Generate: Structural Chain-of-Thought Reasoning for Consistent 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinyue and Liu, Jin and Wang, Hongbo and He, Ran and Huang, Huaibo}, title = {Think-Then-Generate: Structural Chain-of-Thought Reasoning for Consistent 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34270-34280} }
Structural-Semantic Perception for Diffusion-Guided Temporal Forgery Localization-
[pdf]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Ligong and Guo, Yeting and Chi, Haoang}, title = {Structural-Semantic Perception for Diffusion-Guided Temporal Forgery Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35384-35393} }
OpenT2M: No-frill Motion Generation with Open-source, Large-scale, High-quality Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Bin and Zheng, Sipeng and Luo, Hao and Li, Boyuan and Liu, Jing and Lu, Zongqing}, title = {OpenT2M: No-frill Motion Generation with Open-source, Large-scale, High-quality Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30640-30649} }
DPL: Decoupled Prototype Learning for Enhancing Robustness of Vision-Language Transformers to Missing Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jueqing and Qi, Yuanyuan and Yang, Xiaohao and Niu, Shuaicheng and Ke, Fucai and Zhou, Shujie and Tan, Wei and Lin, Jionghao and Buntine, Wray and Rezatofighi, Hamid and Du, Lan}, title = {DPL: Decoupled Prototype Learning for Enhancing Robustness of Vision-Language Transformers to Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39219-39229} }
Toward Generalizable Whole Brain Representations with High-Resolution Light-Sheet Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung E. and Yun, Dae Hee and Patel, Aditi V. and Hon, Madeline and Guan, Webster and Lee, Taegeon and Nguyen, Brian}, title = {Toward Generalizable Whole Brain Representations with High-Resolution Light-Sheet Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35270-35279} }
Unified Personalized Understanding, Generating and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yu and Lin, Tianwei and Zhu, Ruike and Yuan, Yuqian and Zheng, Haoyu and Liang, Liang and Zhang, Wenqiao and Shao, Feifei and Li, Haoyuan and He, Wanggui and Jiang, Hao and Zhuang, Yueting}, title = {Unified Personalized Understanding, Generating and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29400-29409} }
OpenVision 2: A Family of Generative Pretrained Visual Encoders for Multimodal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yanqing and Li, Xianhang and Zhang, Letian and Wang, Zirui and Zheng, Zeyu and Zhou, Yuyin and Xie, Cihang}, title = {OpenVision 2: A Family of Generative Pretrained Visual Encoders for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39164-39174} }
FailureAtlas: Mapping the Failure Landscape of T2I Models via Active Exploration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Muxi and Zhang, Zhaohua and Zhao, Chenchen and Chen, Mingyang and Jiang, Wenyu and Jiang, Tianwen and Zhuo, Jianhuan and Tang, Yu and Xiao, Qiuyong and Zhang, Jihong and Xu, Qiang}, title = {FailureAtlas: Mapping the Failure Landscape of T2I Models via Active Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40782-40791} }
When Pretty Isn't Useful: Investigating Why Modern Text-to-Image Models Fail as Reliable Training Data Generators-
[pdf]
[supp]
[bibtex]@InProceedings{Adamkiewicz_2026_CVPR, author = {Adamkiewicz, Krzysztof and Moser, Brian B. and Frolov, Stanislav and Nauen, Tobias Christian and Raue, Federico and Dengel, Andreas}, title = {When Pretty Isn't Useful: Investigating Why Modern Text-to-Image Models Fail as Reliable Training Data Generators}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36660-36669} }
Thinking with Drafts: Speculative Temporal Reasoning for Efficient Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Pengfei and Cao, Meng and Wang, Yingyao and Wang, Yi and Dong, Jiahua and Song, Jun and Cheng, Yu and Zheng, Bo and Liang, Xiaodan}, title = {Thinking with Drafts: Speculative Temporal Reasoning for Efficient Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36255-36266} }
RGB-Event based Pedestrian Attribute Recognition: A Benchmark Dataset and An Asymmetric RWKV Fusion Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiao and Wang, Haiyang and Wang, Shiao and Chen, Qiang and Jin, Jiandong and Song, Haoyu and Jiang, Bo and Li, Chenglong}, title = {RGB-Event based Pedestrian Attribute Recognition: A Benchmark Dataset and An Asymmetric RWKV Fusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32745-32755} }
R4: Retrieval-Augmented Reasoning for Vision-Language Models in 4D Spatio-Temporal Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sohn_2026_CVPR, author = {Sohn, Tin Stribor and Dillitzer, Maximilian and Corso, Jason J. and Sax, Eric}, title = {R4: Retrieval-Augmented Reasoning for Vision-Language Models in 4D Spatio-Temporal Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38711-38721} }
AERGS-SLAM: Auto-Exposure-Robust Stereo 3D Gaussian Splatting SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhiyu and Hui, Feng and Liu, Yu}, title = {AERGS-SLAM: Auto-Exposure-Robust Stereo 3D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40929-40938} }
Photo3D: Advancing Photorealistic 3D Generation through Structure-Aligned Detail Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Xinyue and Ma, Zhiyuan and Sun, Lingchen and Guo, Yanjun and Zhang, Lei}, title = {Photo3D: Advancing Photorealistic 3D Generation through Structure-Aligned Detail Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34237-34247} }
LiREC-Net: A Target-Free and Learning-Based Network for LiDAR, RGB, and Event Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dash_2026_CVPR, author = {Dash, Aditya Ranjan and Battrawy, Ramy and Schuster, Ren\'e and Stricker, Didier}, title = {LiREC-Net: A Target-Free and Learning-Based Network for LiDAR, RGB, and Event Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31421-31429} }
Drainage: A Unifying Framework for Addressing Class Uncertainty-
[pdf]
[supp]
[bibtex]@InProceedings{Taha_2026_CVPR, author = {Taha, Yasser and Montavon, Gr\'egoire and K\"orber, Nils}, title = {Drainage: A Unifying Framework for Addressing Class Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41934-41943} }
Restore Text First, Enhance Image Later: Two-Stage Scene Text Image Super-Resolution with Glyph Structure Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Minxing and Fan, Linlong and Wang, Qiushi and Wu, Ge and Luo, Yiyan and Yu, Yuhang and Chen, Jinwei and Wang, Yaxing and Fan, Qingnan and Yang, Jian}, title = {Restore Text First, Enhance Image Later: Two-Stage Scene Text Image Super-Resolution with Glyph Structure Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30553-30563} }
Evo-Retriever: LLM-Guided Curriculum Evolution with Viewpoint-Pathway Collaboration for Multimodal Document Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiqing and Guo, Jinyue and Wang, Yaqi and Xiao, Haiyang and Zhang, Yuewei and Liu, Guohua and Wang, Hao Henry}, title = {Evo-Retriever: LLM-Guided Curriculum Evolution with Viewpoint-Pathway Collaboration for Multimodal Document Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31113-31123} }
G-MIXER: Geodesic Mixup-based Implicit Semantic Expansion and Explicit Semantic Re-ranking for Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Jiyoung and Yang, Heejae and Lee, Jee-Hyong}, title = {G-MIXER: Geodesic Mixup-based Implicit Semantic Expansion and Explicit Semantic Re-ranking for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33891-33900} }
Reconstructing CLIP for Open-Vocabulary Dense Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yajie and Zhang, Jinjin and Liu, Qingjie and Huang, Di}, title = {Reconstructing CLIP for Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39208-39218} }
Cross-Axis Feature Fusion with Joint-Wise Motion Difference Prediction for Text-Based 3D Human Motion Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Gyojin and Kim, Junmo}, title = {Cross-Axis Feature Fusion with Joint-Wise Motion Difference Prediction for Text-Based 3D Human Motion Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30618-30628} }
VIRD: View-Invariant Representation through Dual-Axis Transformation for Cross-View Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Juhye and Lee, Wooju and Hong, Dasol and Sung, Changki and Seo, Youngwoo and Kang, Dongwan and Myung, Hyun}, title = {VIRD: View-Invariant Representation through Dual-Axis Transformation for Cross-View Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33693-33702} }
TimeBridge: Self-Supervised Video Representation Learning via Start-End Joint Embedding and In-Between Frame Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qin and Morrison, Abigail and Scharr, Hanno and Krajsek, Kai}, title = {TimeBridge: Self-Supervised Video Representation Learning via Start-End Joint Embedding and In-Between Frame Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39647-39658} }
Thinking With Videos: Multimodal Tool-Augmented Reinforcement Learning for Long Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haoji and Gu, Xin and Li, Jiawen and Ma, Chixiang and Bai, Sule and Zhang, Chubin and Zhang, Bowen and Zhou, Zhichao and He, Dongliang and Tang, Yansong}, title = {Thinking With Videos: Multimodal Tool-Augmented Reinforcement Learning for Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32903-32914} }
High-Fidelity Mobile Avatars with Pruned Local Blendshapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Youyi and Wang, He and Shao, Tianjia and Zhou, Kun}, title = {High-Fidelity Mobile Avatars with Pruned Local Blendshapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32345-32356} }
Neural Differentiation in Deep Networks: A Theoretical Framework for Expressivity and Representational Diversity-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Boyuan and Jiang, Richard}, title = {Neural Differentiation in Deep Networks: A Theoretical Framework for Expressivity and Representational Diversity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41944-41953} }
ShapeAR: Generating Editable Shape Layers via Autoregressive Diffusion-
[pdf]
[bibtex]@InProceedings{Chakraborty_2026_CVPR, author = {Chakraborty, Souymodip and Singh, Ankur and Singh, Amit Vikram and Batra, Vineet and Phogat, Ankit}, title = {ShapeAR: Generating Editable Shape Layers via Autoregressive Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40664-40673} }
Towards Stable Federated Continual Test-Time Adaptation in Wild World-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Liwen and Dong, Xingbo and Liao, Iman Yi and Jin, Zhe}, title = {Towards Stable Federated Continual Test-Time Adaptation in Wild World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29451-29461} }
Beyond Ground-Truth: Leveraging Image Quality Priors for Real-World Image Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Fengyang and Hu, Peng and Xu, Lei and Guo, XingE and Qin, Guanyi and Shen, Yuqi and Fang, Chengyu and Zhang, Rihan and He, Chunming and Farsiu, Sina}, title = {Beyond Ground-Truth: Leveraging Image Quality Priors for Real-World Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29897-29908} }
Hybrid Token Compression for Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Guo, Xiaoyang and Cai, Kaitong and Lv, Qinhan and Fan, Yijia and Chai, Wenhao and Wang, Jian and Wang, Keze}, title = {Hybrid Token Compression for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31889-31899} }
Trust-calibrated Collaborative Learning for Long-Tailed Visual Recognition-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Luo, Tingjin}, title = {Trust-calibrated Collaborative Learning for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40866-40875} }
Parameter-Efficient Adaptation for MLLMs via Implicit Modality Decomposition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingfang and Wang, Yunhong and Wang, Lu and Chen, Jiaxin}, title = {Parameter-Efficient Adaptation for MLLMs via Implicit Modality Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37745-37755} }
Rethinking Two-Stage Referring-by-Tracking in Referring Multi-Object Tracking: Make it Strong Again-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weize and Du, Yunhao and Yin, Qixiang and Zhao, Zhicheng and Su, Fei}, title = {Rethinking Two-Stage Referring-by-Tracking in Referring Multi-Object Tracking: Make it Strong Again}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42549-42559} }
StreamDiT: Real-Time Streaming Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kodaira_2026_CVPR, author = {Kodaira, Akio and Hou, Tingbo and Hou, Ji and Georgopoulos, Markos and Juefei-Xu, Felix and Tomizuka, Masayoshi and Zhao, Yue}, title = {StreamDiT: Real-Time Streaming Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29200-29210} }
Roots Beneath the Cut: Uncovering the Risk of Concept Revival in Pruning-Based Unlearning for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ci and Ding, Zhaojun and Yang, Chence and Liu, Jun and Zhai, Xiaoming and Huang, Shaoyi and Li, Beiwen and Ma, Xiaolong and Lu, Jin and Yuan, Geng}, title = {Roots Beneath the Cut: Uncovering the Risk of Concept Revival in Pruning-Based Unlearning for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35872-35881} }
Efficient Unrolled Networks for Large-Scale 3D Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vo_2026_CVPR, author = {Vo, Romain and Tachella, Juli\'an}, title = {Efficient Unrolled Networks for Large-Scale 3D Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36343-36353} }
See Less, See Right: Bi-directional Perceptual Shaping For Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuoshuo and Zhang, Yizhen and Fu, Jingjing and Song, Lei and Bian, Jiang and Yang, Yujiu and Wang, Rui}, title = {See Less, See Right: Bi-directional Perceptual Shaping For Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33499-33509} }
Mesh-Pro: Asynchronous Advantage-guided Ranking Preference Optimization for Artist-style Quadrilateral Mesh Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhen and Liu, Jian and Lei, Biwen and Xu, Jing and Weng, Haohan and Zhu, Yiling and Chen, Zhuo and Fan, Junfeng and Ma, Yunkai and Du, Dazhao and Guo, Song and Jing, Fengshui and Guo, Chunchao}, title = {Mesh-Pro: Asynchronous Advantage-guided Ranking Preference Optimization for Artist-style Quadrilateral Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34248-34258} }
Causality in Video Diffusers is Separable from Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Xingjian and He, Guande and Li, Zhengqi and Shechtman, Eli and Huang, Xun and Wu, Zongze}, title = {Causality in Video Diffusers is Separable from Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43373-43384} }
X-PCR: A Benchmark for Cross-modality Progressive Clinical Reasoning in Ophthalmic Diagnosis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Gui and Zhong, Zehao and Zhou, YongSong and Li, Yudong and Wu, Ende and Cheah, Wooi Ping and Qu, Rong and Ren, Jianfeng and Shen, Linlin}, title = {X-PCR: A Benchmark for Cross-modality Progressive Clinical Reasoning in Ophthalmic Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33110-33120} }
Task-Oriented Data Synthesis and Control-Rectify Sampling for Remote Sensing Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yunkai and Zhang, Yudong and Zhang, Kunquan and Zhang, Jinxiao and Chen, Xinying and Fu, Haohuan and Dong, Runmin}, title = {Task-Oriented Data Synthesis and Control-Rectify Sampling for Remote Sensing Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42147-42157} }
Recurrent Reasoning with Vision-Language Models for Estimating Long-Horizon Embodied Task Progress-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuelin and Cheng, Sijie and Li, Chen and Li, Zongzhao and Huang, Yuxin and Liu, Yang and Huang, Wenbing}, title = {Recurrent Reasoning with Vision-Language Models for Estimating Long-Horizon Embodied Task Progress}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41150-41159} }
SRA 2: Variational Autoencoder Self-Representation Alignment for Efficient Diffusion Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Mengmeng and Jiang, Dengyang and Li, Liuzhuozheng and Lin, Yucheng and Shen, Guojiang and Kong, Xiangjie and Liu, Yong and Dai, Guang and Wang, Jingdong}, title = {SRA 2: Variational Autoencoder Self-Representation Alignment for Efficient Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32978-32987} }
InvCoSS: Inversion-driven Continual Self-supervised Learning in Medical Multi-modal Image Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Zihao and Rui, Shaohao and Tang, Zhenyu and Wang, Guotai and Wang, Xiaosong}, title = {InvCoSS: Inversion-driven Continual Self-supervised Learning in Medical Multi-modal Image Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42626-42636} }
Beyond Single Images: A Comprehensive Benchmark for Album-Level Vision-Language Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Shawn and Price, Brian and Fan, Yifei and Morse, Bryan}, title = {Beyond Single Images: A Comprehensive Benchmark for Album-Level Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38564-38573} }
AToken: A Unified Tokenizer for Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jiasen and Song, Liangchen and Xu, Mingze and Ahn, Byeongjoo and Wang, Yanjun and Chen, Chen and Dehghan, Afshin and Yang, Yinfei}, title = {AToken: A Unified Tokenizer for Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28701-28711} }
Urban-GS: A Unified 3D Gaussian Splatting Framework for Compact and High-Fidelity Aerial-to-Street Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Meng and Xia, Changqun and Wang, Yuze and Wang, Junyi and Duan, Wantong and Xie, Xinxiong and Qi, Yue}, title = {Urban-GS: A Unified 3D Gaussian Splatting Framework for Compact and High-Fidelity Aerial-to-Street Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33207-33216} }
Bridging RGB and Hematoxylin Components: An Interleaved Guidance and Fusion Framework for Point Supervised Nuclei Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Huan_2026_CVPR, author = {Huan, Zihan and Pan, Xipeng and Zhang, Hualong and Feng, Siyang and Lan, Rushi and Wang, Huadeng and Lu, Haoxiang and Liu, Zhenbing}, title = {Bridging RGB and Hematoxylin Components: An Interleaved Guidance and Fusion Framework for Point Supervised Nuclei Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37518-37527} }
Masking Matters: Unlocking the Spatial Reasoning Capabilities of LLMs for 3D Scene-Language Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2026_CVPR, author = {Jeon, Yerim and Lee, Miso and Moon, WonJun and Heo, Jae-Pil}, title = {Masking Matters: Unlocking the Spatial Reasoning Capabilities of LLMs for 3D Scene-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38668-38677} }
PriVi: Towards a General-Purpose Video Model for Primate Behavior in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mueller_2026_CVPR, author = {Mueller, Felix B. and Meier, Jan F. and Lueddecke, Timo and Vogg, Richard and Freixanet, Roger L. and Hassler, Valentin and Bosshard, Tiffany and Karakoc, Elif and O'Hearn, William J. and Pereira, Sofia M. and Sehner, Sandro and Wierucka, Kaja and Burkart, Judith and Fichtel, Claudia and Fischer, Julia and Gail, Alexander and Hobaiter, Catherine and Ostner, Julia and Samuni, Liran and Sch\"ulke, Oliver and Shahidi, Neda and Wessling, Erin G. and Ecker, Alexander S.}, title = {PriVi: Towards a General-Purpose Video Model for Primate Behavior in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38804-38815} }
CHIPS: Efficient CLIP Adaptation via Curvature-aware Hybrid Influence-based Data Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xinlin and Li, Yichen and Liu, Xiwei and Yang, Haolin and Lu, Yifan and Zou, Ziyun and Li, Yulong and Li, Huifa and Chen, Dongliang and Wang, Qinglei and Liu, Weiyang and Qian, Ying and Shi, Jiangming and Razzak, Imran}, title = {CHIPS: Efficient CLIP Adaptation via Curvature-aware Hybrid Influence-based Data Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29483-29493} }
DiP: Taming Diffusion Models in Pixel Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhennan and Zhu, Junwei and Chen, Xu and Zhang, Jiangning and Hu, Xiaobin and Zhao, Hanzhen and Wang, Chengjie and Yang, Jian and Tai, Ying}, title = {DiP: Taming Diffusion Models in Pixel Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36136-36146} }
ReasonX: MLLM-Guided Intrinsic Image Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dirik_2026_CVPR, author = {Dirik, Alara and Wang, Tuanfeng Yang and Ceylan, Duygu and Zafeiriou, Stefanos and Fr\"uhst\"uck, Anna}, title = {ReasonX: MLLM-Guided Intrinsic Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30802-30812} }
ReLaX: Reasoning with Latent Exploration for Large Reasoning Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shimin and Chen, Xianwei and Shen, Yufan and Ye, Ziyuan and Wu, Jibin}, title = {ReLaX: Reasoning with Latent Exploration for Large Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33761-33771} }
BrickNet: Graph-Backed Generative Brick Assembly-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kulits_2026_CVPR, author = {Kulits, Peter and Schmid, Cordelia}, title = {BrickNet: Graph-Backed Generative Brick Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39252-39261} }
Diffusion Mental Averages-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thawatdamrongkit_2026_CVPR, author = {Thawatdamrongkit, Phonphrm and Seripanitkarn, Sukit and Suwajanakorn, Supasorn}, title = {Diffusion Mental Averages}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35713-35725} }
SFR-Net: Steering-Fusion-Refining Network in Multi-label Zero-Shot Sewer Defect Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhao-Min and Huang, Xinjian and Ge, Yisu and Li, Yu}, title = {SFR-Net: Steering-Fusion-Refining Network in Multi-label Zero-Shot Sewer Defect Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41636-41645} }
LumiMotion: Improving Gaussian Relighting with Scene Dynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Kaleta_2026_CVPR, author = {Kaleta, Joanna and W\'ojcik, Piotr and Marzol, Kacper and Trzcinski, Tomasz and Kania, Kacper and Kowalski, Marek}, title = {LumiMotion: Improving Gaussian Relighting with Scene Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37311-37321} }
D2Dewarp: Dual Dimensions Geometric Representation Learning Based Document Image Dewarping-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Heng and Wu, Xiangping and Chen, Qingcai}, title = {D2Dewarp: Dual Dimensions Geometric Representation Learning Based Document Image Dewarping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34734-34744} }
SegMoTE: Token-Level Mixture of Experts for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yujie and Li, Jingwen and Ju, Sibo and Su, Yanzhou and Yao, He and Liu, Yisong and Zhu, Min and Cheng, Junlong}, title = {SegMoTE: Token-Level Mixture of Experts for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36332-36342} }
STAR: Test-Time Adaptation Can Enhance Universal Prompt Learning for Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Yiwei and Wan, Hui and Luo, Xiao and Deng, Minghua}, title = {STAR: Test-Time Adaptation Can Enhance Universal Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31482-31492} }
Semantic Context Matters: Improving Conditioning for Autoregressive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Dongyang and Xu, Ryan and Zeng, Jianhao and Lan, Rui and Bai, Yancheng and Sun, Lei and Chu, Xiangxiang}, title = {Semantic Context Matters: Improving Conditioning for Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30402-30413} }
LiteVGGT: Boosting Vanilla VGGT via Geometry-aware Cached Token Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shu_2026_CVPR, author = {Shu, Zhijian and Lin, Cheng and Xie, Tao and Yin, Wei and Li, Ben and Pu, Zhiyuan and Li, Weize and Yao, Yao and Cao, Xun and Guo, Xiaoyang and Long, Xiao-Xiao}, title = {LiteVGGT: Boosting Vanilla VGGT via Geometry-aware Cached Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36422-36432} }
Adapter Shield: A Unified Framework with Built-in Authentication for Preventing Unauthorized Zero-Shot Image-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Jun and Miao, Hongyi and Zhou, Yingjie and Zhou, Wangqiu and Zhang, Jianbo and Cao, Linhan and Zhu, Dandan and Yang, Hua and Min, Xiongkuo and Sun, Wei and Zhai, Guangtao}, title = {Adapter Shield: A Unified Framework with Built-in Authentication for Preventing Unauthorized Zero-Shot Image-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30120-30129} }
Med-CMR: A Fine-Grained Benchmark Integrating Visual Evidence and Clinical Logic for Medical Complex Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Haozhen and Ji, Xiaozhong and Liu, Yuansen and Wu, Wenbin and Yan, Xiaoxiao and Liu, Jingjing and Wu, Kai and Pan, Jiazhen and Jian, Bailiang and Zhang, Jiangning and Hu, Xiaobin and Li, Hongwei Bran}, title = {Med-CMR: A Fine-Grained Benchmark Integrating Visual Evidence and Clinical Logic for Medical Complex Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41224-41234} }
Improving Sparse Autoencoder with Dynamic Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dongsheng and Zhang, Jinsen and Su, Dawei and Huang, Hui}, title = {Improving Sparse Autoencoder with Dynamic Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41996-42006} }
SwitchCraft: Training-Free Multi-Event Video Generation with Attention Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Qianxun and Song, Chenxi and Cai, Yujun and Zhang, Chi}, title = {SwitchCraft: Training-Free Multi-Event Video Generation with Attention Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29136-29145} }
Hear you are: Teaching LLMs Spatial Reasoning with Vision and Spatial Sound-
[pdf]
[supp]
[bibtex]@InProceedings{Ryu_2026_CVPR, author = {Ryu, Hyeonggon and Chung, Joon Son and Harwath, David}, title = {Hear you are: Teaching LLMs Spatial Reasoning with Vision and Spatial Sound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38606-38615} }
OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Herzog_2026_CVPR, author = {Herzog, Henry and Bastani, Favyen and Zhang, Yawen and Tseng, Gabriel and Redmon, Joseph and Sablon, Hadrien and Park, Ryan and Morrison, Jacob and Buraczynski, Alexandra and Farley, Karen and Hansen, Josh and Howe, Andrew and Johnson, Patrick Alan and Otterlee, Mark and Schmitt, Ted and Pitelka, Hunter and Daspit, Stephen and Ratner, Rachel and Wilhelm, Christopher and Wood, Sebastian and Jacobi, Mike and Kerner, Hannah and Shelhamer, Evan and Farhadi, Ali and Krishna, Ranjay and Beukema, Patrick}, title = {OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34806-34817} }
OmniBrainBench: A Comprehensive Multimodal Benchmark for Brain Imaging Analysis Across Multi-stage Clinical Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Zhihao and Wang, Cheng and Liu, Shengyuan and Liang, Zhiying and Ye, Zanting and Ju, Min Jie and Woo, Peter YM and Yuan, Yixuan}, title = {OmniBrainBench: A Comprehensive Multimodal Benchmark for Brain Imaging Analysis Across Multi-stage Clinical Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42732-42743} }
Beyond Semantic Search: Towards Referential Anchoring in Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuxin and Zhou, Yinan and Chen, Yuxin and Zhang, Ziqi and Ma, Zongyang and Yuan, Chunfeng and Li, Bing and Gao, Jun and Hu, Weiming}, title = {Beyond Semantic Search: Towards Referential Anchoring in Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31155-31165} }
UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Walmer_2026_CVPR, author = {Walmer, Matthew and Suri, Saksham and Aggarwal, Anirud and Shrivastava, Abhinav}, title = {UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41288-41298} }
Temporal Imbalance of Positive and Negative Supervision in Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Jinge and Zhu, Fengqing}, title = {Temporal Imbalance of Positive and Negative Supervision in Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32299-32308} }
Global-Aware Edge Prioritization for Pose Graph Initialization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Tong and Tolias, Giorgos and Matas, Jiri and Barath, Daniel}, title = {Global-Aware Edge Prioritization for Pose Graph Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28642-28651} }
Geometry-driven OOD Detectors Are Class-Incremental Learners-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Wangwang and Gao, Zijian and Wan, Tianjiao and Cao, Yuan and Dou, Yong and Xu, Kele}, title = {Geometry-driven OOD Detectors Are Class-Incremental Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34638-34649} }
Enhancing Unregistered Hyperspectral Image Super-Resolution via Unmixing-based Abundance Fusion Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yingkai and Zhang, Tao and Nie, Jing and Fu, Ying}, title = {Enhancing Unregistered Hyperspectral Image Super-Resolution via Unmixing-based Abundance Fusion Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41573-41583} }
Saliency-Driven Token Merging for Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Weiying and Chen, Xiaoyu and Zhang, Xin and Hao, Chenhe and Ma, Jitao and Li, Yunsong and Fang, Leyuan}, title = {Saliency-Driven Token Merging for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32184-32193} }
A Semantically Disentangled Unified Model for Multi-category 3D Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, SuYeon and Lee, Wongyu and Cho, MyeongAh}, title = {A Semantically Disentangled Unified Model for Multi-category 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33036-33045} }
AVFakeBench: A Comprehensive Audio-Video Forgery Detection Benchmark for AV-LMMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Shuhan and Li, Peipei and Liu, Xuannan and Zhang, Dongsen and Guo, Xinyu and Li, Zekun}, title = {AVFakeBench: A Comprehensive Audio-Video Forgery Detection Benchmark for AV-LMMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35416-35426} }
Towards Uncertainty-aware Unsupervised Domain Adaptation for Videos and Time-Series with Causal Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Mishra_2026_CVPR, author = {Mishra, Khushboo and Trivedi, Varun and Dutta, Tanima}, title = {Towards Uncertainty-aware Unsupervised Domain Adaptation for Videos and Time-Series with Causal Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29421-29430} }
What Makes Good Synthetic Training Data for Zero-Shot Stereo Matching?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, David and Raistrick, Alexander and Deng, Jia}, title = {What Makes Good Synthetic Training Data for Zero-Shot Stereo Matching?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34171-34180} }
GenieDrive: Towards Physics-Aware Driving World Model with 4D Occupancy Guided Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenya and Liu, Zhe and Lu, Yuxiang and Hou, Liping and Miao, Chenxuan and Peng, Siyi and Feng, Bailan and Bai, Xiang and Zhao, Hengshuang}, title = {GenieDrive: Towards Physics-Aware Driving World Model with 4D Occupancy Guided Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35680-35690} }
Human-like Abstract Visual Reasoning via Understanding and Solving Reasoning Loop-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xinwang and Li, Xiuxing and Li, Qing and Zhuang, Ziyue and Wu, Yutong and Li, Ziyu and Wang, Zhuo and Li, Kai and Hao, Jianye and Wu, Xia}, title = {Human-like Abstract Visual Reasoning via Understanding and Solving Reasoning Loop}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41235-41244} }
Let it Snow! Animating 3D Gaussian Scenes with Dynamic Weather Effects via Physics-Guided Score Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Fiebelman_2026_CVPR, author = {Fiebelman, Gal and Averbuch-Elor, Hadar and Benaim, Sagie}, title = {Let it Snow! Animating 3D Gaussian Scenes with Dynamic Weather Effects via Physics-Guided Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37322-37331} }
Mark4D: Temporally-Consistent Watermarking for 4D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jaejin and Jeong, Minjae and Park, Joonhyuk and Hwang, Yechan and Baek, Seunghun and Kim, Won Hwa}, title = {Mark4D: Temporally-Consistent Watermarking for 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39393-39402} }
3D Gaussian Splatting from Unposed Spike Stream-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yijia and Hu, Tong and Hu, Liwen and Ma, Lei and Huang, Tiejun}, title = {3D Gaussian Splatting from Unposed Spike Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41002-41011} }
WaTeRFlow: Watermark Temporal Robustness via Flow Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Utae and In, Sumin and Ryu, Hyunju and Choi, Jaewan and Yang, Feng and Jeong, Jongheon and Kim, Seungryong and Kim, Sangpil}, title = {WaTeRFlow: Watermark Temporal Robustness via Flow Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31703-31713} }
Post-training Feature Pruning for Fundus Images Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Pham_2026_CVPR, author = {Pham, Van-Nguyen and Le, Duc-Tai and Bum, Junghyun and Choo, Hyunseung}, title = {Post-training Feature Pruning for Fundus Images Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37590-37599} }
LoL: Longer than Longer, Scaling Video Generation to Hour-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Justin and Wu, Jie and Li, Ming and Yang, Tao and Li, Xiaojie and Wang, Rui and Bai, Andrew and Ban, Yuanhao and Hsieh, Cho-Jui}, title = {LoL: Longer than Longer, Scaling Video Generation to Hour}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38132-38142} }
Markovian Scale Prediction: A New Era of Visual Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yu and Liu, Jingyi and Shi, Yiwei and Zhang, Qi and Miao, Duoqian and Wang, Changwei and Cao, Longbing}, title = {Markovian Scale Prediction: A New Era of Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41266-41277} }
See Through the Noise: Improving Domain Generalization in Gaze Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Yanming and Wang, Shijing and Huang, Yaping and Tian, Yi}, title = {See Through the Noise: Improving Domain Generalization in Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31346-31355} }
RAAS: LLM Agentic System Architecture Search with GRPO-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiayi and Wan, Guancheng and Zhang, Man and Ye, Mang}, title = {RAAS: LLM Agentic System Architecture Search with GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34470-34479} }
VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Longteng and Zheng, DanDan and Qiao, Qianqian and Huang, Heng and Wang, Huaye and Bo, Yihang and Peng, Bao and Chen, Jingdong and Zhou, Jun and Jin, Xin}, title = {VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30457-30466} }
GROW: Watermark Generation with Progressive Guidance for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Pengcheng and Jia, Zexi and Zhong, Yijia and Zhang, Jinchao and Zhou, Jie}, title = {GROW: Watermark Generation with Progressive Guidance for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35978-35987} }
Cubic Discrete Diffusion: Discrete Visual Generation on High-Dimensional Representation Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuqing and Ma, Chuofan and Lin, Zhijie and Teng, Yao and Yu, Lijun and Wang, Shuai and Han, Jiaming and Feng, Jiashi and Jiang, Yi and Liu, Xihui}, title = {Cubic Discrete Diffusion: Discrete Visual Generation on High-Dimensional Representation Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36072-36081} }
Ego-InBetween: Generating Object State Transitions in Ego-Centric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Mengmeng and Isobe, Takashi and Jia, Xu and Sun, Yanan and Yang, Zetong and Wang, Weinong and Zhou, Dong and Li, Dong and Lu, Huchuan and Barsoum, Emad}, title = {Ego-InBetween: Generating Object State Transitions in Ego-Centric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43145-43154} }
RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahmadi_2026_CVPR, author = {Ahmadi, Ehsan and Schofield, Hunter and Khamidehi, Behzad and Arasteh, Fazel and Shan, Jinjun and Mou, Lili and Bai, Dongfeng and Rezaee, Kasra}, title = {RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39734-39743} }
E-comIQ-ZH: A Human-Aligned Dataset and Benchmark for Fine-Grained Evaluation of E-commerce Posters with Chain-of-Thought-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Meiqi and Li, Mingyu and Zhu, Junxiong}, title = {E-comIQ-ZH: A Human-Aligned Dataset and Benchmark for Fine-Grained Evaluation of E-commerce Posters with Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30941-30951} }
Progressive Mask Distillation for Self-supervised Video Representation-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Kewei and Liang, Chong and Xie, Zhao and Guo, Dan}, title = {Progressive Mask Distillation for Self-supervised Video Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41677-41687} }
Computational Speckle Pattern Interferometry-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Shengxi and Yang, Sophia and Chan, Dorian and O'Toole, Matthew}, title = {Computational Speckle Pattern Interferometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41710-41719} }
OS-Fed: One Snapshot Is All You Need-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Xuwei and Zhang, Jinghui and Tan, Yuchuan and Huang, Wenbo and Wu, Zhen and Zhou, Shen and Gao, LiSha and Ding, Ding and Dong, Fang}, title = {OS-Fed: One Snapshot Is All You Need}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31758-31768} }
Your Classifier Can Do More: Towards Balancing the Gaps in Classification, Robustness, and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Kaichao and Wang, He and Hao, Xiaoshuai and Yang, Xiulong and Liu, Ajian and Chu, Qi and Diao, Yunfeng and Hong, Richang}, title = {Your Classifier Can Do More: Towards Balancing the Gaps in Classification, Robustness, and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42310-42320} }
FlowSteer: Guiding Few-Step Image Synthesis with Authentic Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Lei and Yin, Hubery and Liu, Gongye and Lv, Zhengyao and Guo, Jingcai and Li, Chen and Luo, Wenhan and Yang, Yujiu and Lyu, Jing}, title = {FlowSteer: Guiding Few-Step Image Synthesis with Authentic Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30381-30390} }
Cross-Architecture Adaptation: Cloud-Edge Continual Test-Time Adaptation with Dynamic Sampling and Heterogeneous Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zirui and Chu, Xianhang and Li, Jiahao and Yang, Xu and Deng, Cheng}, title = {Cross-Architecture Adaptation: Cloud-Edge Continual Test-Time Adaptation with Dynamic Sampling and Heterogeneous Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39901-39910} }
Focus, Don't Prune: Identifying Instruction-Relevant Regions for Information-Rich Image Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Mincheol and Lee, Minseung and Choi, Seonga and Choi, Miso and Oh, Kyeongjin and Lee, Hyunyoung and Park, Cheonyoung and Song, Yongho and Park, Seunghyun and Kim, Jinkyu}, title = {Focus, Don't Prune: Identifying Instruction-Relevant Regions for Information-Rich Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31900-31909} }
UniLDiff: Unlocking the Power of Diffusion Priors for All-in-One Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zihan and Zhou, Liangtai and Chen, Dian and Tang, Ni and Luo, Xiaotong and Xie, Yuan and Qu, Yanyun}, title = {UniLDiff: Unlocking the Power of Diffusion Priors for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37465-37475} }
Progressive Cross-Modal Causal Intervention for Long-Term Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Shaowu and Jia, Xibin and Fan, Chao and Gao, Junyu and Chang, Jing and Sun, Qianmei}, title = {Progressive Cross-Modal Causal Intervention for Long-Term Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31186-31195} }
Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ruiying and Liang, Yuanzhi and Huang, Haibin and Yu, Tianshu and Zhang, Chi}, title = {Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34408-34417} }
STCDiT: Spatio-Temporally Consistent Diffusion Transformer for High-Quality Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Dong, Jiangxin and Sun, Long and Yang, Yixin and Pan, Jinshan}, title = {STCDiT: Spatio-Temporally Consistent Diffusion Transformer for High-Quality Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38281-38290} }
OpenFS: Multi-Hand-Capable Fingerspelling Recognition with Implicit Signing-Hand Detection and Frame-Wise Letter-Conditioned Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2026_CVPR, author = {Cha, Junuk and Kim, Jihyeon and Park, Han-Mu}, title = {OpenFS: Multi-Hand-Capable Fingerspelling Recognition with Implicit Signing-Hand Detection and Frame-Wise Letter-Conditioned Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30707-30717} }
TLMA: Mitigating the Impact of Weakly Labeled Information for Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Rong and Wang, Runqi and Zhang, Yingjun and Tao, Tao and Li, Xiaomeng and Jing, Liping}, title = {TLMA: Mitigating the Impact of Weakly Labeled Information for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35597-35606} }
SounDiT: Geo-Contextual Soundscape-to-Landscape Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Junbo and Tan, Haofeng and Liao, Bowen and Jiang, Albert and Fei, Teng and Huang, Qixing and Zhou, Bing and Tu, Zhengzhong and Ye, Shan and Kang, Yuhao}, title = {SounDiT: Geo-Contextual Soundscape-to-Landscape Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32659-32670} }
EXOTIC: External Vision-driven Incomplete Multi-view Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Shilin and Peng, Dezhong and Ren, Zhenwen and Sun, Yuan}, title = {EXOTIC: External Vision-driven Incomplete Multi-view Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30216-30225} }
FINER: MLLMs Hallucinate under Fine-grained Negative Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Rui and Kim, Sanghwan and Xian, Yongqin and Akata, Zeynep and Alaniz, Stephan}, title = {FINER: MLLMs Hallucinate under Fine-grained Negative Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36235-36244} }
MoRel: Long-Range Flicker-Free 4D Motion Modeling via Anchor Relay-based Bidirectioanl Blending with Hierarchical Densification-
[pdf]
[supp]
[bibtex]@InProceedings{Kwak_2026_CVPR, author = {Kwak, Sangwoon and Kwon, Weeyoung and Jeong, Jun Young and Kim, Geonho and Cheong, Won-Sik and Oh, Jihyong}, title = {MoRel: Long-Range Flicker-Free 4D Motion Modeling via Anchor Relay-based Bidirectioanl Blending with Hierarchical Densification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37370-37379} }
CoV-Align: Efficient Fine-grained Cross-Modal Alignment with Cohesive Visual Semantics Priority-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hengqi and Zhou, Wanting and Kong, Longteng and Feng, Fangxiang and Ren, Lei and Chen, Wei and Wang, Xiaojie}, title = {CoV-Align: Efficient Fine-grained Cross-Modal Alignment with Cohesive Visual Semantics Priority}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36828-36837} }
Question-guided Visual Compression with Memory Feedback for Long-Term Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamao_2026_CVPR, author = {Yamao, Sosuke and Miyahara, Natsuki and Qi, Yuankai and Takeuchi, Shun}, title = {Question-guided Visual Compression with Memory Feedback for Long-Term Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32850-32859} }
A Difference-in-Difference Approach to Detecting AI-Generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Xinyi and Ye, Kai and Shi, Chengchun and Yang, Ying and Zhu, Jin and Zhou, Hongyi}, title = {A Difference-in-Difference Approach to Detecting AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42964-42975} }
CompetitorFormer: Mitigating Query Conflicts for 3D Instance Segmentation via Competitive Strategy-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Duanchu and Yang, Junjie and Gong, Haoran and Liu, Jing and Wang, Di}, title = {CompetitorFormer: Mitigating Query Conflicts for 3D Instance Segmentation via Competitive Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34724-34733} }
Ego-Grounding for Personalized Question-Answering in Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junbin and Zhang, Shenglang and Zhu, Pengxiang and Yao, Angela}, title = {Ego-Grounding for Personalized Question-Answering in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40537-40547} }
MERIT: Multi-domain Efficient RAW Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Wenjun and Fu, Shenghao and Jin, Yian and Ni, Yang and Cui, Ziteng and Chen, Hanning and He, Yirui and Liu, Yezi and Yun, Sanggeon and Jeong, SungHeon and Masukawa, Ryozo and Chung, William Youngwoo and Imani, Mohsen}, title = {MERIT: Multi-domain Efficient RAW Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37216-37225} }
FeatureFool: Zero-Query Fooling of Video Models via Feature Map-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Duoxun and Xiao, Xi and Hu, Guangwu and Sun, Kangkang and Yang, Xiao and Chen, Dongyang and Li, Qing and Yin, Yong-jie and Wang, Jiyao}, title = {FeatureFool: Zero-Query Fooling of Video Models via Feature Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42268-42279} }
A Temporal and Content Co-Awareness Latent Diffusion for Controllable Hand Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Shuang and Ren, Pengfei and Sun, Haifeng and Pan, Ting and Qi, Qi and Zhang, Lei and Liu, Cong and Liao, Jianxin and Wang, Jingyu}, title = {A Temporal and Content Co-Awareness Latent Diffusion for Controllable Hand Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38324-38334} }
StyleGallery: Training-free and Semantic-aware Personalized Style Transfer from Arbitrary Image References-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Boyu and Ye, Yunfan and Liu, Chang and Wu, Weishang and Liu, Fang and Cai, Zhiping}, title = {StyleGallery: Training-free and Semantic-aware Personalized Style Transfer from Arbitrary Image References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29092-29102} }
HOG-Layout: Hierarchical 3D Scene Generation, Optimization and Editing via Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haiyan and Zhang, Deyu and Weng, Dongdong and Song, Weitao and Duh, Henry Been-Lirn}, title = {HOG-Layout: Hierarchical 3D Scene Generation, Optimization and Editing via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31586-31596} }
SeD-UD: An Influence-Driven and Hierarchically-Decoupled Information Bottleneck for Multimodal Intent Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qin and Zhang, Wenbo and Liu, Limei and Peng, Han and Yang, Junfeng and Xu, Guanying}, title = {SeD-UD: An Influence-Driven and Hierarchically-Decoupled Information Bottleneck for Multimodal Intent Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30346-30356} }
FAAR: Efficient Frequency-Aware Multi-Task Fine-Tuning via Automatic Rank Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fontana_2026_CVPR, author = {Fontana, Maxime and Spratling, Michael and Shi, Miaojing}, title = {FAAR: Efficient Frequency-Aware Multi-Task Fine-Tuning via Automatic Rank Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31135-31144} }
GeoWorld: Geometric World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zeyu and Li, Danning and Reid, Ian and Hartley, Richard}, title = {GeoWorld: Geometric World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30952-30963} }
IFCSR: Inference-Free Fidelity-Realism Control for One-Step Diffusion-based Real-World Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Back_2026_CVPR, author = {Back, Jonghee and Kim, Jongju and Kim, Jeong-Uk and Kim, Eunjin and Jeon, Minyong}, title = {IFCSR: Inference-Free Fidelity-Realism Control for One-Step Diffusion-based Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38187-38197} }
4D Local Modeling Toward Dynamic Global Perception for Ambiguity-free Rotation-Invariant Point Cloud Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Jiaxun and Fan, Wentao and Amayri, Manar and Bouguila, Nizar}, title = {4D Local Modeling Toward Dynamic Global Perception for Ambiguity-free Rotation-Invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31440-31449} }
Mocap-2-to-3: Multi-view Lifting for Monocular Motion Recovery with 2D Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhumei and Hu, Zechen and Guo, Ruoxi and Pi, Huaijin and Feng, Ziyong and Zhang, Liang and Pei, Mingtao and Huang, Siyuan}, title = {Mocap-2-to-3: Multi-view Lifting for Monocular Motion Recovery with 2D Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42869-42878} }
LayoutAD: Exploring Semantic-Geometric Misalignment Reasoning for Scene Layout Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Zhichao and Zhang, Jiasheng and Sun, Jiyun and Cui, Jiangtao and Qiao, Xiaotian}, title = {LayoutAD: Exploring Semantic-Geometric Misalignment Reasoning for Scene Layout Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35567-35576} }
Few-Step Diffusion Sampling Through Instance-Aware Discretizations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Liangyu and Wang, Ruoyu and Zhao, Tong and Fu, Dingwen and Lei, Mingkun and Zhu, Beier and Zhang, Chi}, title = {Few-Step Diffusion Sampling Through Instance-Aware Discretizations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35882-35892} }
Revisiting 3D Reconstruction Kernels as Low-Pass Filters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shengjun and Chen, Min and Wei, Yibo and Dong, Mingyu and Duan, Yueqi}, title = {Revisiting 3D Reconstruction Kernels as Low-Pass Filters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33374-33383} }
Mining Instance-Centric Vision-Language Contexts for Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Soo Won and Lee, KyungChae and Cho, Hyungchan and Son, Taein and Cho, Nam Ik and Choi, Jun Won}, title = {Mining Instance-Centric Vision-Language Contexts for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40418-40427} }
SpatialVID: A Large-Scale Video Dataset with Spatial Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Yuan, Yufeng and Zheng, Rujie and Lin, Youtian and Gao, Jian and Chen, Lin-Zhuo and Bao, Yajie and Zeng, Chang and Zhou, Yanxi and Long, Xiao-Xiao and Zhu, Hao and Zhang, Zhaoxiang and Cao, Xun and Yao, Yao}, title = {SpatialVID: A Large-Scale Video Dataset with Spatial Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42592-42603} }
ReasonMap: Towards Fine-Grained Visual Reasoning from Transit Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Sicheng and Wang, Song and Ouyang, Shuyi and Kong, Lingdong and Song, Zikai and Zhu, Jianke and Wang, Huan and Wang, Xinchao}, title = {ReasonMap: Towards Fine-Grained Visual Reasoning from Transit Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41077-41088} }
3D sans 3D Scans: Scalable Pre-training from Video-Generated Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamada_2026_CVPR, author = {Yamada, Ryousuke and Ide, Kohsuke and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Puy, Gilles and Bursuc, Andrei and Asano, Yuki M.}, title = {3D sans 3D Scans: Scalable Pre-training from Video-Generated Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39075-39085} }
DK-DDIL: Adaptive Knowledge Retention for Dynamic Domain-Incremental Learning in Medical Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yuxi and Liu, Sujie and Yang, Jing and Wang, Jiacheng and Chen, Yiping and Magnier, Baptiste and Wang, Liansheng}, title = {DK-DDIL: Adaptive Knowledge Retention for Dynamic Domain-Incremental Learning in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36290-36299} }
MUST: Modality-Specific Representation-Aware Transformer for Diffusion-Enhanced Survival Prediction with Missing Modality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Kyungwon and Hwang, Dosik}, title = {MUST: Modality-Specific Representation-Aware Transformer for Diffusion-Enhanced Survival Prediction with Missing Modality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30312-30321} }
Diff-SemiER: Transparency-Aware Adaptive Fusion Diffusion Model with Generative Prior for Semi-Transparent Eyeglasses Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Yin, Shiqi and Lian, Zhenxiang and Guo, Jingtao}, title = {Diff-SemiER: Transparency-Aware Adaptive Fusion Diffusion Model with Generative Prior for Semi-Transparent Eyeglasses Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30813-30822} }
LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Tianye and Xie, Yiming and Liang, Yiqing and Chatterjee, Moitreya and Miraldo, Pedro and Jiang, Huaizu}, title = {LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36433-36443} }
Hierarchical Codec Diffusion for Video-to-Speech Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Jiaxin and Cong, Gaoxiang and Wang, Chenhui and Wen, Xin-Cheng and Li, Zhaoyang and Cao, Boyuan and Shan, Hongming}, title = {Hierarchical Codec Diffusion for Video-to-Speech Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43352-43362} }
SO-Bench: A Structural Output Evaluation of Multimodal LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Di and Ma, Kaixin and Nan, Feng and Chen, Haofeng and Zhai, Bohan and Griffiths, David and Gao, Mingfei and Gan, Zhe and Verma, Eshan and Yang, Yinfei and Chen, Zhifeng and Dehghan, Afshin}, title = {SO-Bench: A Structural Output Evaluation of Multimodal LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37694-37704} }
Real-Time Neural Video Compression with Unified Intra and Inter Coding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Hui and Bian, Yifan and Li, Li and Wu, Jingran and Zhang, Xianguo and Liu, Dong}, title = {Real-Time Neural Video Compression with Unified Intra and Inter Coding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35217-35226} }
RebRL: Reinforcing Discrete Visual Diffusion Models with Rebalanced Timestep Credits-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mu and Ma, Tianren and Liu, Yunfan and Hu, Kun and Ye, Qixiang}, title = {RebRL: Reinforcing Discrete Visual Diffusion Models with Rebalanced Timestep Credits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43135-43144} }
Task-Aware Image Signal Processor for Advanced Visual Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Kai and Xiao, Jin and Zhang, Leheng and Shi, Kexuan and Gu, Shuhang}, title = {Task-Aware Image Signal Processor for Advanced Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33672-33681} }
Think Visually, Reason Textually: Vision-Language Synergy in Abstract Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beichen and Zang, Yuhang and Dong, Xiaoyi and Cao, Yuhang and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Think Visually, Reason Textually: Vision-Language Synergy in Abstract Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41203-41212} }
UniLight: A Unified Representation for Lighting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zitian and Georgiev, Iliyan and Fischer, Michael and Hold-Geoffroy, Yannick and Lalonde, Jean-Francois and Deschaintre, Valentin}, title = {UniLight: A Unified Representation for Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29684-29694} }
MaskDexGrasp: Generative Masked Modeling for Part-Aware Dexterous Grasp Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Binghui and Zhou, Lin and Xu, Haoxuan and Yan, Jianan and Yu, Zhipeng and Liu, Zekai and Wang, Yangang}, title = {MaskDexGrasp: Generative Masked Modeling for Part-Aware Dexterous Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29598-29609} }
Infinity-RoPE: Action-Controllable Infinite Video Generation Emerges From Autoregressive Self-Rollout-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yesiltepe_2026_CVPR, author = {Yesiltepe, Hidir and Meral, Tuna and Akan, Adil Kaan and Oktay, Kaan and Yanardag, Pinar}, title = {Infinity-RoPE: Action-Controllable Infinite Video Generation Emerges From Autoregressive Self-Rollout}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40256-40265} }
Learning Latent Transmission and Glare Maps for Lens Veiling Glare Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Xiaolong and Jiang, Qi and Sun, Lei and Yu, Zongxi and Yang, Kailun and Wu, Peixuan and Zhou, Jiacheng and Gao, Yao and Ma, Yaoguang and Yang, Ming-Hsuan and Wang, Kaiwei}, title = {Learning Latent Transmission and Glare Maps for Lens Veiling Glare Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33995-34005} }
UnicEdit-10M: A Dataset and Benchmark Breaking the Scale-Quality Barrier via Unified Verification for Reasoning-Enriched Edits-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Keming and Huang, Zhipeng and Fu, Canmiao and Liu, Qingyang and Cai, Jiani and Lv, Zheqi and Li, Chen and LYU, Jing and Zhao, Zhou and Zhang, Shengyu}, title = {UnicEdit-10M: A Dataset and Benchmark Breaking the Scale-Quality Barrier via Unified Verification for Reasoning-Enriched Edits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37279-37289} }
U^2Flow: Uncertainty-Aware Unsupervised Optical Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xunpei and Lin, Wenwei and Chang, Yi and Chen, Gang}, title = {U{\textasciicircum}2Flow: Uncertainty-Aware Unsupervised Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28690-28700} }
Back to Basics: Let Denoising Generative Models Denoise-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tianhong and He, Kaiming}, title = {Back to Basics: Let Denoising Generative Models Denoise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36115-36125} }
PvP: Data-Efficient Humanoid Robot Learning with Proprioceptive-Privileged Contrastive Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Mingqi and Yu, Tao and Song, Haolin and Li, Bo and Jin, Xin and Chen, Hua and Zeng, Wenjun}, title = {PvP: Data-Efficient Humanoid Robot Learning with Proprioceptive-Privileged Contrastive Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42375-42385} }
When Do Models Actually Decide? Mapping the Layer-Wise Decision Timeline in Pretrained Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {When Do Models Actually Decide? Mapping the Layer-Wise Decision Timeline in Pretrained Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34546-34554} }
CoWTracker: Tracking by Warping instead of Correlation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Zihang and Insafutdinov, Eldar and Sucar, Edgar and Vedaldi, Andrea}, title = {CoWTracker: Tracking by Warping instead of Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42571-42580} }
Understanding Temporal Logic Consistency in Video-Language Models through Cross-Modal Attention Discriminability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chengzhi and Huang, Heyan and Jian, Ping and Yang, Zhen and Tian, Yaning and Guo, Zhongbin}, title = {Understanding Temporal Logic Consistency in Video-Language Models through Cross-Modal Attention Discriminability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31812-31821} }
Rascene: High-Fidelity 3D Scene Imaging with mmWave Communication Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Kunzhe and Zhou, Geo Jie and Liu, Xiaoming and Zeng, Huacheng}, title = {Rascene: High-Fidelity 3D Scene Imaging with mmWave Communication Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36454-36463} }
Depth Hypothesis Guided Iterative Refinement for Event-Image Monocular Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Daikun and Wang, Teng and Sun, Changyin}, title = {Depth Hypothesis Guided Iterative Refinement for Event-Image Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29504-29513} }
SDUIE: Semi-Supervised Diffusion for Underwater Image Enhancement with Quant-Text Dual Control-
[pdf]
[supp]
[bibtex]@InProceedings{Cong_2026_CVPR, author = {Cong, Xiaofeng and Zhang, Yu-Xin and Shen, Hao and Jin, Yeying and Hou, Junming and Gui, Jie}, title = {SDUIE: Semi-Supervised Diffusion for Underwater Image Enhancement with Quant-Text Dual Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37423-37433} }
From Static to Dynamic: Exploring Self-supervised Image-to-Video Representation Transfer Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Xu, Qianqian and Wen, Peisong and Dai, Siran and Zhao, Xilin and Huang, Qingming}, title = {From Static to Dynamic: Exploring Self-supervised Image-to-Video Representation Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31250-31261} }
Transition Models: Rethinking the Generative Learning Objective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zidong and Zhang, Yiyuan and Yue, Xiaoyu and Yue, Xiangyu and Li, Yangguang and Ouyang, Wanli and Bai, Lei}, title = {Transition Models: Rethinking the Generative Learning Objective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29178-29189} }
M3DLayout: A Multi-Source Dataset of 3D Indoor Layouts and Structured Descriptions for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiheng and Cai, Zhuojiang and Wang, Mingdao and Guo, Meitong and Li, Tianxiao and Lin, Li and Wang, Yuwang}, title = {M3DLayout: A Multi-Source Dataset of 3D Indoor Layouts and Structured Descriptions for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34217-34226} }
MM-OVSeg: Multimodal Optical-SAR Fusion for Open-Vocabulary Segmentation in Remote Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yimin and Xiao, Aoran and Chen, Hongruixuan and Xia, Junshi and Yokoya, Naoto}, title = {MM-OVSeg: Multimodal Optical-SAR Fusion for Open-Vocabulary Segmentation in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42202-42212} }
Depth Any Endoscopy: Towards Self-Supervised Generalizable Depth Estimation in Monocular Endoscopy-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Shuwei and Zhu, Kejin and Ma, Shixing and Du, Xinzhe and Zhang, Baochang and Min, Zhe}, title = {Depth Any Endoscopy: Towards Self-Supervised Generalizable Depth Estimation in Monocular Endoscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34126-34137} }
GGPT: Geometry-Grounded Point Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yutong and Wang, Yiming and Zhang, Xucong and Prokudin, Sergey and Tang, Siyu}, title = {GGPT: Geometry-Grounded Point Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28959-28968} }
Bulk RNA-seq Guided Multi-modal Detection of Anomalous Regions in Human Cancer via Spatial Transcriptomics-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Hang and Yang, Ruocheng and You, Wenjie and Huang, Zhilin and Zhang, Daoqiang and Shao, Wei}, title = {Bulk RNA-seq Guided Multi-modal Detection of Anomalous Regions in Human Cancer via Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41815-41825} }
GEM-TFL: Bridging Weak and Full Supervision for Forgery Localization through EM-Guided Decomposition and Temporal Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaodong and Zheng, Yuanming and Wang, Suting and Yang, Junqi and Yang, Yuhong and Tu, Weiping and Wang, Zhongyuan}, title = {GEM-TFL: Bridging Weak and Full Supervision for Forgery Localization through EM-Guided Decomposition and Temporal Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42932-42941} }
VLM-Loc: Localization in Point Cloud Maps via Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Shuhao and Liao, Youqi and Wang, Peijie and Liao, Wenlong and Zhang, Qilin and Busam, Benjamin and Chen, Xieyuanli and Liu, Yun}, title = {VLM-Loc: Localization in Point Cloud Maps via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41365-41375} }
Edge-Focused Super-Resolution for Omnidirectional Images with Spherical Geometric Augmentation-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shaolin and Li, Yuying and Zhong, Lei and Li, Shigang and Li, Jianfeng}, title = {Edge-Focused Super-Resolution for Omnidirectional Images with Spherical Geometric Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38198-38207} }
From Panel to Pixel: Zoom-In Vision-Language Pretraining from Biomedical Scientific Literature-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Kun and Sun, Min and Chen, Zhen and Lozano, Alejandro and He, Xiangteng and Li, Shi and Navab, Nassir and Sun, Xiaoxiao and Padoy, Nicolas and Yeung-Levy, Serena}, title = {From Panel to Pixel: Zoom-In Vision-Language Pretraining from Biomedical Scientific Literature}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42649-42658} }
Love Me, Love My Label: Rethinking the Role of Labels in Prompt Retrieval for Visual In-Context Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Tianci and Pan, Haohao and Wang, Jinpeng and Lian, Niu and Chen, Xinrui and Chen, Bin and Xia, Shu-Tao and Yuan, Chun}, title = {Love Me, Love My Label: Rethinking the Role of Labels in Prompt Retrieval for Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38774-38783} }
OralGPT-Omni: A Versatile Dental Multimodal Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Jing and Liang, Yuci and Lin, Lizhuo and Fan, Yuxuan and Zhou, Wenkai and Guo, Kaixin and Ye, Zanting and Sun, Yanpeng and Zhang, Xinyu and Yang, Yanqi and Li, Qiankun and Tang, Hao and Tsoi, James Kit-Hon and Shen, Linlin and Hung, Kuo Feng}, title = {OralGPT-Omni: A Versatile Dental Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38509-38519} }
Adaptive Spectral Feature Forecasting for Diffusion Sampling Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jiaqi and Shi, Juntong and Li, Puheng and Ye, Haotian and Guo, Qiushan and Ermon, Stefano}, title = {Adaptive Spectral Feature Forecasting for Diffusion Sampling Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43320-43330} }
Progressive Neural Architecture Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Caiyang and Huang, Chen and Liu, Yun and Tang, Chenwei and Ju, Wei and Lv, Jiancheng}, title = {Progressive Neural Architecture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34524-34534} }
EDGS: Eliminating Densification for Efficient Convergence of 3DGS-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kotovenko_2026_CVPR, author = {Kotovenko, Dmytro and Grebenkova, Olga and Ommer, Bj\"orn}, title = {EDGS: Eliminating Densification for Efficient Convergence of 3DGS}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41065-41076} }
CubeComposer: Spatio-Temporal Autoregressive 4K 360deg Video Generation from Perspective Video-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Lingen and Wang, Guangzhi and Li, Xiaoyu and Zhang, Zhaoyang and Dou, Qi and Gu, Jinwei and Xue, Tianfan and Shan, Ying}, title = {CubeComposer: Spatio-Temporal Autoregressive 4K 360deg Video Generation from Perspective Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32625-32635} }
Failure Modes for Deep Learning-Based Online Mapping: How to Measure and Address Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hubbertz_2026_CVPR, author = {Hubbertz, Michael and Han, Qi and Meisen, Tobias}, title = {Failure Modes for Deep Learning-Based Online Mapping: How to Measure and Address Them}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39755-39764} }
TR2M: Transferring Monocular Relative Depth to Metric Depth with Language Descriptions and Dual-Level Scale-Oriented Contrast-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Beilei and Huang, Yiming and Bai, Long and Ren, Hongliang}, title = {TR2M: Transferring Monocular Relative Depth to Metric Depth with Language Descriptions and Dual-Level Scale-Oriented Contrast}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34181-34192} }
ReManNet: A Riemannian Manifold Network for Monocular 3D Lane Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Chengzhi and Li, Bijun}, title = {ReManNet: A Riemannian Manifold Network for Monocular 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33046-33056} }
VideoAuto-R1: Video Auto Reasoning via Thinking Once, Answering Twice-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shuming and Zhuge, Mingchen and Zhao, Changsheng and Chen, Jun and Wu, Lemeng and Liu, Zechun and Zhu, Chenchen and Cai, Zhipeng and Zhou, Chong and Liu, Haozhe and Chang, Ernie and Suri, Saksham and Xu, Hongyu and Qian, Qi and Wen, Wei and Varadarajan, Balakrishnan and Liu, Zhuang and Xu, Hu and Bordes, Florian and Krishnamoorthi, Raghuraman and Ghanem, Bernard and Chandra, Vikas and Xiong, Yunyang}, title = {VideoAuto-R1: Video Auto Reasoning via Thinking Once, Answering Twice}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32829-32839} }
Taming the Long Tail: Rebalancing Adversarial Training via Adaptive Perturbation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lilin and Guo, Yimo and Li, Yue and Shi, Jiancheng and Liu, Xianggen}, title = {Taming the Long Tail: Rebalancing Adversarial Training via Adaptive Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34900-34907} }
Parallax to Align Them All: An OmniParallax Attention Mechanism for Distributed Multi-View Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haotian and Long, Feiyue and Yu, Yixin and Xue, Jian and Tang, Haocheng and Xu, Tongda and Shi, Zhenning and Wang, Yan and Ma, Siwei and Zhang, Jiaqi}, title = {Parallax to Align Them All: An OmniParallax Attention Mechanism for Distributed Multi-View Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41322-41331} }
Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhu, Chen and Feng, Xiaokun and Hao, Aiming and Zhu, Jiashu and Lei, Jiachen and Wu, Jiahong and Chu, Xiangxiang and Yang, Jufeng}, title = {Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36649-36659} }
VidPrism: Heterogeneous Mixture of Experts for Image-to-Video Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Rui and Wang, Chuanming and Ma, Huadong}, title = {VidPrism: Heterogeneous Mixture of Experts for Image-to-Video Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31230-31239} }
GeoAgent: Learning to Geolocate Everywhere with Reinforced Geographic Characteristics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Modi and Zhang, Yiming and Sun, Boyuan and Zhang, Dingwen and Cheng, Ming-Ming and Hou, Qibin}, title = {GeoAgent: Learning to Geolocate Everywhere with Reinforced Geographic Characteristics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41352-41364} }
ViHOI: Human-Object Interaction Synthesis with Visual Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Songjin and Zhong, Linjie and Guo, Ling and Ding, Changxing}, title = {ViHOI: Human-Object Interaction Synthesis with Visual Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30686-30695} }
FILTR: Extracting Topological Features from Pretrained 3D Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Martinez_2026_CVPR, author = {Martinez, Louis and Ovsjanikov, Maks}, title = {FILTR: Extracting Topological Features from Pretrained 3D Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36179-36189} }
MSGNav: Unleashing the Power of Multi-modal 3D Scene Graph for Zero-Shot Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Xun and Zhao, Shijia and Wang, Yunxiang and Lu, Xin and Zhang, Wanfa and Qu, Rongsheng and Li, Weixin and Wang, Yunhong and Wen, Chenglu}, title = {MSGNav: Unleashing the Power of Multi-modal 3D Scene Graph for Zero-Shot Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37154-37163} }
Subspace Alignment for CLIP-based Continual Learning via Canonical Correlation Analysis-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Huan and Dong, Shuyu and Zheng, Yujin and Wang, Dingwen and Fan, Shenghua and Lyu, Fan}, title = {Subspace Alignment for CLIP-based Continual Learning via Canonical Correlation Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32257-32266} }
From Feature Learning to Spectral Basis Learning: A Unifying and Flexible Framework for Efficient and Robust Shape Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Feifan and Chen, Hongyang}, title = {From Feature Learning to Spectral Basis Learning: A Unifying and Flexible Framework for Efficient and Robust Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31377-31388} }
Diffusion Forcing Planner: History-Annealed Planning with Time-Dependent Guidance for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zehan and Li, Yaoyi and Zhang, Neng and Cai, Jia}, title = {Diffusion Forcing Planner: History-Annealed Planning with Time-Dependent Guidance for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39796-39805} }
TC-Pade: Trajectory-Consistent Pade Approximation for Diffusion Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Shaoxuan and Cui, Benlei and Huang, Bukun and Ye, Zhizeng and Sun, Yunyun and Huang, Longtao and Xue, Hui and Yang, Yang and Hong, Haiwen and Tang, Jingqun and Zhao, Zhou}, title = {TC-Pade: Trajectory-Consistent Pade Approximation for Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35768-35778} }
Red-teaming Retrieval-Augmented Diffusion Models via Poisoning Knowledge Bases-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Xinqi and Liu, Yihao and Wang, Dong and Xiao, Bin}, title = {Red-teaming Retrieval-Augmented Diffusion Models via Poisoning Knowledge Bases}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34961-34970} }
StreamReady: Learning What to Answer and When in Long Streaming Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Azad_2026_CVPR, author = {Azad, Shehreen and Vineet, Vibhav and Rawat, Yogesh S}, title = {StreamReady: Learning What to Answer and When in Long Streaming Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40494-40504} }
V-Attack: Targeting Disentangled Value Features for Controllable Adversarial Attacks on LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2026_CVPR, author = {Nie, Sen and Zhang, Jie and Yan, Jianxin and Shan, Shiguang and Chen, Xilin}, title = {V-Attack: Targeting Disentangled Value Features for Controllable Adversarial Attacks on LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42257-42267} }
3D Space as a Scratchpad for Editable Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saha_2026_CVPR, author = {Saha, Oindrila and Krs, Vojtech and Mech, Radomir and Maji, Subhransu and Gadelha, Matheus and Blackburn-Matzen, Kevin}, title = {3D Space as a Scratchpad for Editable Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29233-29243} }
Confusion-Aware Spectral Regularizer for Long-Tailed Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziquan and Jin, Gaojie and Zhu, Hanruo and Lu, Si-Yuan and Zhang, Yunxiao and Fu, Zeyu and Mu, Ronghui and Zhang, Guoqiang and Sun, Zhao and Xia, Yuhang and Shang, Jiaxing and Li, Xiang and Liu, Lu and Huang, Tianjin}, title = {Confusion-Aware Spectral Regularizer for Long-Tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28712-28722} }
Semantic Noise Reduction via Teacher-Guided Dual-Path Audio-Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Linge and Chen, Yingying and Zhu, Bingke and Zhou, Lu and Wang, Jinqiao}, title = {Semantic Noise Reduction via Teacher-Guided Dual-Path Audio-Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32005-32014} }
PhysInOne: Visual Physics Learning and Reasoning in One Suite-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Siyuan and Wang, Hejun and Cheng, Hu and Li, Jinxi and Wang, Dongsheng and Jiang, Junwei and Jin, Yixiao and Huang, Jiayue and Mao, Shiwei and Liu, Shangjia and Yang, Yafei and Song, Hongkang and Wei, Shenxing and Zhang, Zihui and Wang, Bing and Wang, Zhihua and Zou, Chuhang and Yang, Bo}, title = {PhysInOne: Visual Physics Learning and Reasoning in One Suite}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33131-33142} }
Omni-Attack: Adversarial Attacks on Open-Ended VQA in Black-Box Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Kai and Yu, Weichen and Zhang, Li and Robey, Alexander and Zou, Andy and Hu, Haoqi and Xu, Chengming and Fredrikson, Matt}, title = {Omni-Attack: Adversarial Attacks on Open-Ended VQA in Black-Box Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42341-42351} }
From Intuition to Investigation: A Tool-Augmented Reasoning MLLM Framework for Generalizable Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haoyuan and Wang, Keyao and Zhang, Guosheng and Yue, Haixiao and Tan, Zhiwen and Peng, Siran and Zhang, Tianshuo and Tan, Xiao and Chen, Kunbin and He, Wei and Wang, Jingdong and Liu, Ajian and Zhu, Xiangyu and Lei, Zhen}, title = {From Intuition to Investigation: A Tool-Augmented Reasoning MLLM Framework for Generalizable Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40855-40865} }
Learning 3D Reconstruction with Priors in Test Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Lei and Wu, Haoyu and Dave, Akshat and Samaras, Dimitris}, title = {Learning 3D Reconstruction with Priors in Test Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36550-36560} }
EgoXtreme: A Dataset for Robust Object Pose Estimation in Egocentric Views under Extreme Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Taegyoon and Han, Yegyu and Ji, Seojin and Park, Jaewoo and Kim, Sojeong and Kwon, Taein and Kim, Hyung-Sin}, title = {EgoXtreme: A Dataset for Robust Object Pose Estimation in Egocentric Views under Extreme Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40696-40706} }
Hyper-PCN: Hypergraph-Based Point Cloud Completion via High-Order Correlation Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Linfei and Tan, Pei and Li, Siqi and Zou, Changqing and Gao, Yue}, title = {Hyper-PCN: Hypergraph-Based Point Cloud Completion via High-Order Correlation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39121-39130} }
FISHuman: Fine-grained Single-image 3D Human Reconstruction via Multi-view 4D Remeshing-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hanxi and Men, Yifang and Lian, Zhouhui}, title = {FISHuman: Fine-grained Single-image 3D Human Reconstruction via Multi-view 4D Remeshing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42765-42776} }
Global Prior Meets Local Consistency: Dual-Memory Augmented Vision-Language-Action Model for Efficient Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zaijing and Hu, Bing and Shao, Rui and Chen, Gongwei and Jiang, Dongmei and Xie, Pengwei and Hao, Jianye and Nie, Liqiang}, title = {Global Prior Meets Local Consistency: Dual-Memory Augmented Vision-Language-Action Model for Efficient Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35135-35145} }
CoRiM: Conflict-driven Risk Minimization for Dynamic Multimodal Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Shihao and Wei, Wei}, title = {CoRiM: Conflict-driven Risk Minimization for Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37821-37830} }
StreamVLO: Streaming Visual-LiDAR Odometry with Cumulative Drift Compensation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mengmeng and Liu, Jiuming and Yang, Michael Ying and Jiang, Chaokang and Li, Jiangtao and Zhang, Yunpeng and Wang, Hesheng and Nex, Francesco and Cheng, Hao}, title = {StreamVLO: Streaming Visual-LiDAR Odometry with Cumulative Drift Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39086-39097} }
WhisperNet: A Scalable Solution for Bandwidth-Efficient Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Zhao, Xinyan}, title = {WhisperNet: A Scalable Solution for Bandwidth-Efficient Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32154-32163} }
DeX-Portrait: Disentangled and Expressive Portrait Animation via Explicit and Latent Motion Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yuxiang and Li, Zhe and Wang, Yanwen and Zhu, Hao and Cao, Xun and Liu, Ligang}, title = {DeX-Portrait: Disentangled and Expressive Portrait Animation via Explicit and Latent Motion Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40009-40019} }
MultiModalPFN: Extending Prior-Data Fitted Networks for Multimodal Tabular Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Wall and Song, Chaeyoung and Kim, Hanul}, title = {MultiModalPFN: Extending Prior-Data Fitted Networks for Multimodal Tabular Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30357-30367} }
Multi-Hierarchical Contrastive Spectral Fusion for Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Bing and Wang, Xiaoli and Lu, Gui-Fu and Li, Zechao}, title = {Multi-Hierarchical Contrastive Spectral Fusion for Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39617-39626} }
Causal Motion Diffusion Models for Autoregressive Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Qing and Watanabe, Akihisa and Fujiwara, Kent}, title = {Causal Motion Diffusion Models for Autoregressive Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38366-38375} }
GeoSemba: Reconstructing State Space Model for Cross Paradigm Representation in Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xutao and Li, Jiarui and Liu, Junwen and Ren, Yonggong}, title = {GeoSemba: Reconstructing State Space Model for Cross Paradigm Representation in Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29990-29999} }
GeCo-SRT: Geometry-aware Continual Adaptation for Cross-Task Sim-to-Real Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Wenbo and Xia, Wenke and Zhang, Weitao and Hu, Di}, title = {GeCo-SRT: Geometry-aware Continual Adaptation for Cross-Task Sim-to-Real Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42408-42417} }
Dynamics: Language-Based Representation for Inferring Rigid-Body Dynamics From Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Kao_2026_CVPR, author = {Kao, Chia-Hsiang and Huynh, Cong Phuoc and Wang, Chien-Yi and Vesdapunt, Noranart and Stojanov, Stefan and Hariharan, Bharath and Obiednikov, Oleksandr and Zhou, Ning}, title = {Dynamics: Language-Based Representation for Inferring Rigid-Body Dynamics From Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42364-42374} }
Specificity-aware reinforcement learning for fine-grained open-world classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Angheben_2026_CVPR, author = {Angheben, Samuele and Berasi, Davide and Conti, Alessandro and Ricci, Elisa and Wang, Yiming}, title = {Specificity-aware reinforcement learning for fine-grained open-world classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41467-41477} }
Forensic-Friendly Image Manipulation via Controllable Latent Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hanyu and Wu, Haiwei and Tian, Jinyu and LI, Jianqing and Zhou, Jiantao}, title = {Forensic-Friendly Image Manipulation via Controllable Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35394-35404} }
Thinking with Programming Vision: Towards a Unified View for Thinking with Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Zirun and Hong, Minjie and Zhang, Feng and Jia, Kai and Jin, Tao}, title = {Thinking with Programming Vision: Towards a Unified View for Thinking with Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33467-33476} }
Fast Reasoning Segmentation for Images and Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Yiqing and Unberath, Mathias}, title = {Fast Reasoning Segmentation for Images and Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34765-34774} }
Towards Fine-Grained Attribution: Instance-Aware Preference Optimization for Aligning Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jiayang and Wang, Pin and Wang, Hongbo and Liu, Xinyue and Huang, Huaibo and He, Ran}, title = {Towards Fine-Grained Attribution: Instance-Aware Preference Optimization for Aligning Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43155-43164} }
MoRGS: Efficient Per-Gaussian Motion Reasoning for Streamable Dynamic 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Wonjoon and Woo, Sungmin and Kim, Donghyeong and Lee, Jungho and Park, Sangheon and Lee, Sangyoun}, title = {MoRGS: Efficient Per-Gaussian Motion Reasoning for Streamable Dynamic 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41044-41053} }
TAR: Token-Aware Refinement for Fine-grained Generalized Category Discovery-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Zhang, Yu and Mi, Siya and Wei, Xiu-Shen}, title = {TAR: Token-Aware Refinement for Fine-grained Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31995-32004} }
Geometrically-Constrained Agent for Spatial Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zeren and Lu, Xiaoya and Zheng, Zhijie and Li, Pengrui and He, Lehan and Zhou, Yijin and Shao, Jing and Zhuang, Bohan and Sheng, Lu}, title = {Geometrically-Constrained Agent for Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38689-38699} }
More Than Meets the Eye: A Unified Image Fusion Framework via Semantic-Pixel Entropy Trade-off for Zero-Shot Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaowen and Li, Jing and Huo, Hongtao and Cao, Haozhe and Wang, Renhua and Dong, Xu}, title = {More Than Meets the Eye: A Unified Image Fusion Framework via Semantic-Pixel Entropy Trade-off for Zero-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41510-41520} }
CaricHarmony: Contrastive Diffusion Paths for Identity-Preserving Caricature Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dongyu and Chen, Dar-Yen and Song, Yi-Zhe}, title = {CaricHarmony: Contrastive Diffusion Paths for Identity-Preserving Caricature Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36126-36135} }
PhyCritic: Multimodal Critic Models for Physical AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianyi and Wang, Shihao and Liu, Guilin and Dong, Yi and Li, Ming and Huang, Heng and Kautz, Jan and Yu, Zhiding}, title = {PhyCritic: Multimodal Critic Models for Physical AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36881-36892} }
Scaling Parallel Sequence Models to Vision Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yitong and McCarthy, Collin and Wang, Hongjun and Ye, Hanrong and Dou, Qi and Xue, Tianfan and Gu, Jinwei and Kautz, Jan and Yin, Hongxu and Molchanov, Pavlo and Liu, Sifei}, title = {Scaling Parallel Sequence Models to Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41332-41341} }
IPR-1: Interactive Physical Reasoner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingyu and Zhuo, Lifeng and Tan, Tianxi and Xie, Guocan and Nie, Xian and Li, Yan and Zhao, Renjie and He, Zizhu and Wang, Ziyu and Cai, Jiting and Li, Yong-Lu}, title = {IPR-1: Interactive Physical Reasoner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33415-33425} }
Making Training-Free Diffusion Segmentors Scale with the Generative Power-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Benyuan and Xu, Qianqian and Wang, Zitai and Cao, Xiaochun and Huang, Longtao and Huang, Qingming}, title = {Making Training-Free Diffusion Segmentors Scale with the Generative Power}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35861-35871} }
GS-CLIP: Zero-shot 3D Anomaly Detection by Geometry-Aware Prompt and Synergistic View Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Zehao and Liu, An and Wang, Yan}, title = {GS-CLIP: Zero-shot 3D Anomaly Detection by Geometry-Aware Prompt and Synergistic View Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35587-35596} }
VGG-T$^3$: Offline Feed-Forward 3D Reconstruction at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elflein_2026_CVPR, author = {Elflein, Sven and Li, Ruilong and Agostinho, S\'ergio and Gojcic, Zan and Leal-Taix\'e, Laura and Zhou, Qunjie and Osep, Aljosa}, title = {VGG-T\${\textasciicircum}3\$: Offline Feed-Forward 3D Reconstruction at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36464-36474} }
SketchRevive: Fine-Grained Pixel-to-Vector Sketch Completion with Diffusion-Prior-Guided Multimodal LLMs-
[pdf]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Ran and Hu, Haoxiang and Pei, Chenxi and Liu, Yanxuan and Qiang, Wenwen and Liu, Fang and Deng, Xiaoming and Ma, Cuixia and Liu, Yong-Jin}, title = {SketchRevive: Fine-Grained Pixel-to-Vector Sketch Completion with Diffusion-Prior-Guided Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43165-43174} }
Emergent Extreme-View Geometry in 3D Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiwen and Tung, Joseph and Cai, Ruojin and Fouhey, David and Averbuch-Elor, Hadar}, title = {Emergent Extreme-View Geometry in 3D Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36411-36421} }
Rethinking BCE Loss for Multi-Label Image Recognition with Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Ao and Jiang, Zhiwei and Cheng, Zifeng and Wang, Cong and Yin, Yafeng and Yang, Shufan and Gu, Qing}, title = {Rethinking BCE Loss for Multi-Label Image Recognition with Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38784-38793} }
SE(3)-Equivariance with Geometric and Topological Guidance for Category-Level Object Pose Estimation-
[pdf]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Sheng and Zhai, Di-Hua and Xia, Yuanqing}, title = {SE(3)-Equivariance with Geometric and Topological Guidance for Category-Level Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35114-35123} }
InstructMix2Mix: Consistent Sparse-View Editing Through Multi-View Model Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gilo_2026_CVPR, author = {Gilo, Daniel and Litany, Or}, title = {InstructMix2Mix: Consistent Sparse-View Editing Through Multi-View Model Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29000-29011} }
SEA: Evaluating Sketch Abstraction Efficiency via Element-level Commonsense Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jiho and Choi, Sieun and Seo, Jaeyoon and Sohn, Minho and Kim, Yeana and Kim, Jihie}, title = {SEA: Evaluating Sketch Abstraction Efficiency via Element-level Commonsense Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31652-31661} }
AdaSpark: Adaptive Sparsity for Efficient Long-Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Handong and Liu, Zikang and Guo, Longteng and Yue, Tongtian and Tang, Yepeng and Zhu, Xinxin and Zheng, Chuanyang and Wang, Ziming and Wang, Zhibin and Song, Jun and Yu, Cheng and Zheng, Bo and Liu, Jing}, title = {AdaSpark: Adaptive Sparsity for Efficient Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40548-40558} }
DreamingComics: A Story Visualization Pipeline via Subject and Layout Customized Generation using Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Patrick and Chen, Chen}, title = {DreamingComics: A Story Visualization Pipeline via Subject and Layout Customized Generation using Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36769-36780} }
DeAR: Fine-Grained VLM Adaptation by Decomposing Attention Head Roles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yiming and Yang, Hongkun and Wang, Lionel Z. and Chen, Bin and Xian, Weizhi and Teng, Jianzhi}, title = {DeAR: Fine-Grained VLM Adaptation by Decomposing Attention Head Roles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31514-31523} }
ELITE: Efficient Gaussian Head Avatar from a Monocular Video via Learned Initialization and Test-time Generative Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Youwang_2026_CVPR, author = {Youwang, Kim and Hyoseok, Lee and Subin, Park and Pons-Moll, Gerard and Oh, Tae-Hyun}, title = {ELITE: Efficient Gaussian Head Avatar from a Monocular Video via Learned Initialization and Test-time Generative Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40152-40162} }
ParaUni: Enhance Generation in Unified Multimodal Model with Reinforcement-driven Hierarchical Parallel Information Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Jiangtong and Liu, Lin and Huang, Jie and Zhang, Xiaopeng and Tian, Qi and Zhao, Feng}, title = {ParaUni: Enhance Generation in Unified Multimodal Model with Reinforcement-driven Hierarchical Parallel Information Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41836-41846} }
Decoding 3D Perception via BrainSSD: Synergistic Fusion of EEG Representations from Static and Dynamic Visual Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Yincheng and Shi, Enze and Zhang, Shu}, title = {Decoding 3D Perception via BrainSSD: Synergistic Fusion of EEG Representations from Static and Dynamic Visual Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42711-42721} }
Ov3R: Open-Vocabulary Semantic 3D Reconstruction from RGB Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Ziren and Li, Xiaohan and Tosi, Fabio and Han, Jiawei and Mattoccia, Stefano and Cai, Jianfei and Poggi, Matteo}, title = {Ov3R: Open-Vocabulary Semantic 3D Reconstruction from RGB Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34206-34216} }
GeoRelight: Learning Joint Geometrical Relighting and Reconstruction with Flexible Multi-Modal Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Yuxuan and Liang, Ruofan and Zakharov, Egor and Bagautdinov, Timur and Cao, Chen and Nam, Giljoo and Saito, Shunsuke and Pons-Moll, Gerard and Romero, Javier}, title = {GeoRelight: Learning Joint Geometrical Relighting and Reconstruction with Flexible Multi-Modal Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29771-29780} }
MotionCrafter: Dense Geometry and Motion Reconstruction with a 4D VAE-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ruijie and Lu, Jiahao and Hu, Wenbo and Han, Xiaoguang and Cai, Jianfei and Shan, Ying and Zheng, Chuanxia}, title = {MotionCrafter: Dense Geometry and Motion Reconstruction with a 4D VAE}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40303-40315} }
cryoSENSE: Compressive Sensing Enables High-throughput Microscopy with Sparse and Generative Priors on the Protein Cryo-EM Image Manifold-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shabeeb_2026_CVPR, author = {Shabeeb, Zain and Saeedi, Daniel and Tsui, Darin and Jamali, Vida and Aghazadeh, Amirali}, title = {cryoSENSE: Compressive Sensing Enables High-throughput Microscopy with Sparse and Generative Priors on the Protein Cryo-EM Image Manifold}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34072-34083} }
4DEquine: Disentangling Motion and Appearance for 4D Equine Reconstruction from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Jin and An, Liang and Cheng, Pujin and Liu, Yebin and Tang, Xiaoying}, title = {4DEquine: Disentangling Motion and Appearance for 4D Equine Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32496-32506} }
Multi-Metric Representation Learning Strategy Based on Clustering for Fine-Grained Multimodal Sentiment Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yidan and Wang, Zongheng and Xing, Hongjie and Li, Chunguo and Liu, Xiaoxiao}, title = {Multi-Metric Representation Learning Strategy Based on Clustering for Fine-Grained Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37864-37873} }
VectorArk: Learning Practical Image Vectorization with Rounded Polygon Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Gehlaut_2026_CVPR, author = {Gehlaut, Tarun and Liu, Difan and Bansal, Charu and Malani, Krutik and Chakraborty, Souymodip and Phogat, Ankit and Fisher, Matthew and Batra, Vineet}, title = {VectorArk: Learning Practical Image Vectorization with Rounded Polygon Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31619-31627} }
DriveCombo: Benchmarking Compositional Traffic Rule Reasoning in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Enhui and Zhang, Jiahuan and Zheng, Guantian and Tang, Tao and Li, Shengbo Eben and Lu, Yuhang and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Hao, Zhihui and Lang, Xianpeng and Yu, Kaicheng}, title = {DriveCombo: Benchmarking Compositional Traffic Rule Reasoning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32113-32123} }
SAVE: Speech-Aware Video Representation Learning for Video-Text Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruixiang and Xu, Zhihao and Lan, Bangxiang and Xin, Zijie and Liu, Jingyu and Li, Xirong}, title = {SAVE: Speech-Aware Video Representation Learning for Video-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31166-31175} }
Time Blindness: Why Video-Language Models Can't See What Humans Can?-
[pdf]
[supp]
[bibtex]@InProceedings{Upadhyay_2026_CVPR, author = {Upadhyay, Ujjwal and Ranjan, Mukul and Shen, Zhiqiang and Elhoseiny, Mohamed}, title = {Time Blindness: Why Video-Language Models Can't See What Humans Can?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30906-30918} }
Stepwise Credit Assignment for GRPO on Flow-Matching Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Savani_2026_CVPR, author = {Savani, Yash and Kveton, Branislav and Liu, Yuchen and Wang, Yilin and Shi, Jing and Mukherjee, Subhojyoti and Vlassis, Nikos and Singh, Krishna Kumar}, title = {Stepwise Credit Assignment for GRPO on Flow-Matching Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42007-42017} }
BAgger: Backwards Aggregation for Mitigating Drift in Autoregressive Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Po_2026_CVPR, author = {Po, Ryan and Chan, Eric Ryan and Chen, Changan and Wetzstein, Gordon}, title = {BAgger: Backwards Aggregation for Mitigating Drift in Autoregressive Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43727-43739} }
Image-to-Point Cloud Feature Back-Projection for Multimodal Training of 3D Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jiawei and Poggi, Matteo and Huan, Li and Wang, Changshuo and Liu, Kaiqi and Li, Wei}, title = {Image-to-Point Cloud Feature Back-Projection for Multimodal Training of 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42049-42060} }
Improved Mean Flows: On the Challenges of Fastforward Generative Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Zhengyang and Lu, Yiyang and Wu, Zongze and Shechtman, Eli and Kolter, J. Zico and He, Kaiming}, title = {Improved Mean Flows: On the Challenges of Fastforward Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30467-30476} }
POLAR: A Portrait OLAT Dataset and Generative Framework for Illumination-Aware Face Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuo and Yang, Chengqun and Su, Zhuo and Lv, Zheng and Gao, Jingnan and Zhang, Xiaoyuan and Yang, Xiaokang and Yan, Yichao}, title = {POLAR: A Portrait OLAT Dataset and Generative Framework for Illumination-Aware Face Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28871-28881} }
2D-LFM: Lifting Foundation Model without 3D Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Dabhi_2026_CVPR, author = {Dabhi, Mosam and Gill, Irhas and Jeni, L\'aszl\'o A. and Lucey, Simon}, title = {2D-LFM: Lifting Foundation Model without 3D Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34303-34311} }
Bi-Bridge: Bidirectional Diffusion Bridges for Low-Light Image Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Hua_2026_CVPR, author = {Hua, Zeyu and Li, Hui and Wang, Yu and Wang, Song and Zhu, Congchao and Zheng, Caixia}, title = {Bi-Bridge: Bidirectional Diffusion Bridges for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37455-37464} }
StaMo: Unsupervised Learning of Generalizable Robot Motion from Compact State Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mingyu and Shu, Jiuhe and Chen, Hui and Li, Zeju and Zhao, Canyu and Yang, Jiange and Gao, Shenyuan and Chen, Hao and Shen, Chunhua}, title = {StaMo: Unsupervised Learning of Generalizable Robot Motion from Compact State Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35014-35024} }
Mind the Way You Select Negative Texts: Pursuing the Distance Consistency in OOD Detection with VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhikang and Xu, Qianqian and Wang, Zitai and Hua, Cong and Li, Sicong and Yang, Zhiyong and Huang, Qingming}, title = {Mind the Way You Select Negative Texts: Pursuing the Distance Consistency in OOD Detection with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34650-34661} }
DualPrim: Compact 3D Reconstruction with Positive and Negative Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Xiaoxu and Chen, Zhongmin and Yang, Bo and Chen, Weikai and Liu, Weixiao and Gao, Lin}, title = {DualPrim: Compact 3D Reconstruction with Positive and Negative Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29081-29091} }
TagSplat: Topology-Aware Gaussian Splatting for Dynamic Mesh Modeling and Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhi and Weng, Dongdong and Su, Mo and Chen, Yixiao and Dongye, Xiaonuo and Xu, Chenyu}, title = {TagSplat: Topology-Aware Gaussian Splatting for Dynamic Mesh Modeling and Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40971-40980} }
Machine Unlearning via Adaptive Gradient Reweighting and Multi-stage Objective Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Juxin and Shi, Haoyu and Wang, Mengyao and Zhang, Huaiwen}, title = {Machine Unlearning via Adaptive Gradient Reweighting and Multi-stage Objective Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39403-39413} }
CURVE: A Benchmark for Cultural and Multilingual Long Video Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Darshan and Nagrani, Arsha and Manikantan, Kawshik and Singh, Harman and Tewari, Dinesh and Weyand, Tobias and Schmid, Cordelia and Angelova, Anelia and Dave, Shachi}, title = {CURVE: A Benchmark for Cultural and Multilingual Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32860-32871} }
Proxy-Tuning: Tailoring Multimodal Autoregressive Models for Subject-Driven Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yi and Qian, Shengju and Zhu, Lingting and Liu, Lei and Qiao, Wandi and Li, Ziqiang and Yu, Lequan and Li, Bin}, title = {Proxy-Tuning: Tailoring Multimodal Autoregressive Models for Subject-Driven Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43331-43340} }
PropFly: Learning to Propagate via On-the-Fly Supervision from Pre-trained Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Wonyong and Moon, Jaeho and Lee, Jaehyup and Kim, Soo Ye and Kim, Munchurl}, title = {PropFly: Learning to Propagate via On-the-Fly Supervision from Pre-trained Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43228-43238} }
LocateAnything3D: Vision-Language 3D Detection with Chain-of-Sight-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Man_2026_CVPR, author = {Man, Yunze and Wang, Shihao and Zhang, Guowen and Bjorck, Johan and Gui, Liang-Yan and Fan, Jim and Kautz, Jan and Wang, Yu-Xiong and Yu, Zhiding}, title = {LocateAnything3D: Vision-Language 3D Detection with Chain-of-Sight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31089-31102} }
ViBES: A Conversational Agent with Behaviorally-Intelligent 3D Virtual Body-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Juze and Chen, Changan and Chen, Xin and Yu, Heng and Xiang, Tiange and Khan, Ali Sartaz and Lakshmikanth, Shrinidhi K. and Adeli, Ehsan}, title = {ViBES: A Conversational Agent with Behaviorally-Intelligent 3D Virtual Body}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39994-40008} }
Image-based Outlier Synthesis With Training Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Regmi_2026_CVPR, author = {Regmi, Sudarshan}, title = {Image-based Outlier Synthesis With Training Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39338-39350} }
Thermal-Det: Language-Guided Cross-Modal Distillation for Open-Vocabulary Thermal Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ranasinghe_2026_CVPR, author = {Ranasinghe, Yasiru and Schenck, Elim and Yellin, Florence and Hu, Shuowen and Funk, Christopher and Patel, Vishal M.}, title = {Thermal-Det: Language-Guided Cross-Modal Distillation for Open-Vocabulary Thermal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34628-34637} }
DLWM: Dual Latent World Models enable Holistic Gaussian-centric Pre-training in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yiyao and Xue, Ying and Zhang, Haiming and Jiang, Guangfeng and Zhou, Wending and Yan, Xu and Gao, Jiantao and Cai, Yingjie and Liu, Bingbing and Li, Zhen and Shen, Shaojie}, title = {DLWM: Dual Latent World Models enable Holistic Gaussian-centric Pre-training in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39713-39723} }
Dual-Granularity Memory for Efficient Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hongjun and Liu, Lin and Li, Jianguo and Lin, Tao}, title = {Dual-Granularity Memory for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38016-38026} }
VinQA: Visual Elements Interleaved Long-form Answer Generation for Real-World Multimodal Document QA-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Young Rok and Kong, Hyesoo and An, Kyunghwan and Huh, Jae Sub and KIM, Gyeonghun and Choi, Stanley Jungkyu}, title = {VinQA: Visual Elements Interleaved Long-form Answer Generation for Real-World Multimodal Document QA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41130-41139} }
A Bit is All You Need! Efficient Video Capture via Single Bit Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Gandikota_2026_CVPR, author = {Gandikota, Kanchana Vaishnavi and Moeller, Michael and Kolb, Andreas and Choubey, Bhaskar and Chandramouli, Paramanand}, title = {A Bit is All You Need! Efficient Video Capture via Single Bit Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34016-34027} }
BuildingGPT: Auto-Regressive Building Wireframe Reconstruction Model with Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuzhou and Zhu, Lingjie and Ye, Hanqiao and Liu, Yujun and Huang, Shangfeng and Gao, Xiang and Wang, Ruisheng and Shen, Shuhan}, title = {BuildingGPT: Auto-Regressive Building Wireframe Reconstruction Model with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36400-36410} }
Unlocking the Power of Critical Factors for 3D Visual Geometry Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Guangkai and Geng, Hua and Zheng, Huanyi and Yin, Songyi and Sun, Yanlong and Chen, Hao and Shen, Chunhua}, title = {Unlocking the Power of Critical Factors for 3D Visual Geometry Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28979-28989} }
No Need For Real Anomaly: MLLM Empowered Zero-Shot Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Zunkai and Li, Ke and Liu, Jiajia and Yang, Jie and Qiao, Yuanyuan}, title = {No Need For Real Anomaly: MLLM Empowered Zero-Shot Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35648-35658} }
LottieGPT: Tokenizing Vector Animation for Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junhao and Gao, Kejun and Cui, Yuehan and Sun, Mingze and Chen, Mingjin and Wang, Shaohui and Long, Xiaoxiao and Ma, Fei and Tian, Qi and Zhao, Hao and Huang, Ruqi}, title = {LottieGPT: Tokenizing Vector Animation for Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31639-31651} }
Matching Every Pair to Track Every Point: PairFormer for All-Pairs Tracking and Video Trajectory Fields-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Guangyang and Ding, Youran and Che, Xinyu and Sun, Benyuan and Yang, Yi and Liu, Xiaohong}, title = {Matching Every Pair to Track Every Point: PairFormer for All-Pairs Tracking and Video Trajectory Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35187-35196} }
Upsample Anything: A Simple and Hard to Beat Baseline for Feature Upsampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Minseok and Hamilton, Mark and Kim, Changick}, title = {Upsample Anything: A Simple and Hard to Beat Baseline for Feature Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29707-29716} }
LESA: Learnable Stage-Aware Predictors for Diffusion Model Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Peiliang and Liu, Jiacheng and Xu, Haowen and Wang, Xinyu and Zou, Chang and Zhang, Linfeng}, title = {LESA: Learnable Stage-Aware Predictors for Diffusion Model Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43300-43309} }
Residual Connections Harm Generative Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiao and Jiang, Ruoxi and Gao, William and Willet, Rebecca and Maire, Michael}, title = {Residual Connections Harm Generative Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39669-39679} }
SANER: Switchable Adapter with Non-parametric Enhanced Routing for Person De-Reidentification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yimin and Pu, Nan and Yang, Fengxiang and Li, Wenjing and Li, Zhihui and Zhong, Zhun}, title = {SANER: Switchable Adapter with Non-parametric Enhanced Routing for Person De-Reidentification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40376-40385} }
LangRef3DGS: Natural Language-Guided 3D Referential Segmentation from Partial Observations via 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Zhang, Qin and Zhou, Kun}, title = {LangRef3DGS: Natural Language-Guided 3D Referential Segmentation from Partial Observations via 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38595-38605} }
Premier: Personalized Preference Modulation with Learnable User Embedding in Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zihao and Wei, Yuxiang and Zhou, Xinpeng and Zhang, Tianyu and Liang, Tao and Bai, Yalong and Zhang, Hongzhi and Zuo, Wangmeng}, title = {Premier: Personalized Preference Modulation with Learnable User Embedding in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29146-29156} }
Computer Vision with a Superpixelation Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mahalingam_2026_CVPR, author = {Mahalingam, Sasidharan and Brown, Rachel and Ingle, Atul}, title = {Computer Vision with a Superpixelation Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41773-41783} }
Elastic3D: Controllable Stereo Video Conversion with Guided Latent Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Metzger_2026_CVPR, author = {Metzger, Nando and Truong, Prune and Bhat, Goutam and Schindler, Konrad and Tombari, Federico}, title = {Elastic3D: Controllable Stereo Video Conversion with Guided Latent Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32693-32703} }
DuetMerging: Synergizing Dynamic and Static Strategies for Mitigating Task Interference in Model Merging-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Cao, Guiping and Song, Yaguang and Tao, Ming and Gong, Haoran and Liu, Junhui and Wang, Yaowei and Jiang, Dongmei}, title = {DuetMerging: Synergizing Dynamic and Static Strategies for Mitigating Task Interference in Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41954-41963} }
Denoising as Path Planning: Training-Free Acceleration of Diffusion Models with DPCache-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Bowen and Wang, Yuanbin and Xu, Huajiang and Chen, Biaolong and Zhang, Aixi and Jiang, Hao and Jin, Zhengzheng and Liu, Xu and Huang, Pipei}, title = {Denoising as Path Planning: Training-Free Acceleration of Diffusion Models with DPCache}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43632-43642} }
GPFlow: Gaussian Prototype Probability Flow for Unsupervised Multi-Modal Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yiting and Yang, Xulei and Liao, Jingyi and Zhang, Jing and Liu, Fayao}, title = {GPFlow: Gaussian Prototype Probability Flow for Unsupervised Multi-Modal Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43103-43112} }
MVLM: Template-Free Tracking via Vision-Language Margin Confidence and Memory-Gated Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Dae-Hyeon and Baek, Mina and Ha, Jeong-Hun and Park, Chan-Seop and Ganiev, Jamshidjon and Bae, Seung-Hwan}, title = {MVLM: Template-Free Tracking via Vision-Language Margin Confidence and Memory-Gated Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35156-35165} }
Machine Mental Imagery: Empower Multimodal Reasoning with Latent Visual Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zeyuan and Yu, Xueyang and Chen, Delin and Shen, Maohao and Gan, Chuang}, title = {Machine Mental Imagery: Empower Multimodal Reasoning with Latent Visual Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33510-33520} }
Flow Map Distillation Without Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2026_CVPR, author = {Tong, Shangyuan and Ma, Nanye and Xie, Saining and Jaakkola, Tommi}, title = {Flow Map Distillation Without Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33973-33984} }
Unified Vector Floorplan Generation via Markup Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shiohara_2026_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko}, title = {Unified Vector Floorplan Generation via Markup Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39262-39271} }
Discover, Segment, and Select: A Progressive Mechanism for Zero-shot Camouflaged Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yilong and Tian, Jianxin and Zhang, Shengchuan and Cao, Liujuan}, title = {Discover, Segment, and Select: A Progressive Mechanism for Zero-shot Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34745-34754} }
ForeAct: Steering Your VLA with Efficient Visual Foresight Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhuoyang and Yang, Shang and Hu, Qinghao and Huang, Luke J. and Hou, James and Sun, Yufei and Lu, Yao and Han, Song}, title = {ForeAct: Steering Your VLA with Efficient Visual Foresight Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37195-37205} }
RAPID: Reusing Attention Sparsity with Inter-step Adaptation for Efficient Video Diffusion-
[pdf]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Shangran and Lu, Lu and Chen, Jian and Liu, Qiang}, title = {RAPID: Reusing Attention Sparsity with Inter-step Adaptation for Efficient Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36147-36156} }
Adaptive Auxiliary Prompt Blending for Target-Faithful Diffusion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Kwanyoung and Cha, SeungJu and Ahn, Yebin and Oh, Hyunwoo and Koh, Sungho and Kim, Dong-Jin}, title = {Adaptive Auxiliary Prompt Blending for Target-Faithful Diffusion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43707-43716} }
Time-Aware One Step Diffusion Network for Real-World Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianyi and Duan, Zheng-Peng and Guo, Chun-Le and Jiang, Peng-Tao and Li, Bo and Cheng, Ming-Ming and Li, Chongyi}, title = {Time-Aware One Step Diffusion Network for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30543-30552} }
Medic-AD: Towards Medical Vision-Language Model's Clinical Intelligence-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Woohyeon and Kim, Jaeik and Cho, Sunghwan Steve and Hong, Pa and Jeong, Wookyoung and Nam, Yoojin and Kim, Namjoon and Wong, Ginny Y. and Cheung, Ka Chun and Do, Jaeyoung}, title = {Medic-AD: Towards Medical Vision-Language Model's Clinical Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36321-36331} }
RAM: Recover Any 3D Human Motion in-the-Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Sen and Zhu, Ning and Zhong, Jinqin and Zhou, Jiale and Zhang, Huaping and Hwang, Jenq-Neng and Li, Lei}, title = {RAM: Recover Any 3D Human Motion in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42789-42799} }
UniFusion: A Unified Image Fusion Framework with Robust Representation and Source-Aware Preservation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xingyuan and Du, Songcheng and Zou, Yang and Xu, Haoyuan and Jiang, Zhiying and Liu, Jinyuan}, title = {UniFusion: A Unified Image Fusion Framework with Robust Representation and Source-Aware Preservation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33869-33880} }
Just-in-Time: Training-Free Spatial Acceleration for Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Wenhao and Li, Ji and Liu, Zhaoqiang}, title = {Just-in-Time: Training-Free Spatial Acceleration for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40643-40652} }
Beyond Pixel Simulation: Pathology Image Generation via Diagnostic Semantic Tokens and Prototype Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Minghao and Liu, Yichen and Liu, Yizhou and Chen, Zizhi and Tang, Jingqun and Wu, Xuecheng and Yang, Dingkang and Zhang, Lihua}, title = {Beyond Pixel Simulation: Pathology Image Generation via Diagnostic Semantic Tokens and Prototype Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42744-42754} }
GeoMMBench and GeoMMAgent: Toward Expert-Level Multimodal Intelligence in Geoscience and Remote Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Aoran and Cheng, Shihao and Xu, Yonghao and Ren, Yexian and Chen, Hongruixuan and Yokoya, Naoto}, title = {GeoMMBench and GeoMMAgent: Toward Expert-Level Multimodal Intelligence in Geoscience and Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34843-34853} }
DBMSolver: A Training-free Diffusion Bridge Sampler for High-Quality Image-to-Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Venugopal_2026_CVPR, author = {Venugopal, Sankarshana and Mostafavi, Mohammad and Choi, Jonghyun}, title = {DBMSolver: A Training-free Diffusion Bridge Sampler for High-Quality Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36062-36071} }
Mixture of Style Experts for Diverse Image Stylization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Shihao and Ouyang, Ziheng and Kang, Yijia and Wang, Qilong and Zhou, Mi and Li, Bo and Cheng, Ming-Ming and Hou, Qibin}, title = {Mixture of Style Experts for Diverse Image Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30500-30510} }
Multinex: Lightweight Low-light Image Enhancement via Multi-prior Retinex-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brateanu_2026_CVPR, author = {Brateanu, Alexandru and Mu, Tingting and Ancuti, Codruta O. and Ancuti, Cosmin}, title = {Multinex: Lightweight Low-light Image Enhancement via Multi-prior Retinex}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29887-29896} }
BriMA: Bridged Modality Adaptation for Multi-Modal Continual Action Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Kanglei and Li, Chang and Pan, Qingyi and Wang, Liyuan}, title = {BriMA: Bridged Modality Adaptation for Multi-Modal Continual Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38904-38914} }
Decoupling Bias, Aligning Distributions: Synergistic Fairness Optimization for Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Feng and Yi, Wenhui and Zhou, Yunpeng and He, Xinan and Rao, Hong and Hu, Shu}, title = {Decoupling Bias, Aligning Distributions: Synergistic Fairness Optimization for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32704-32713} }
BioVITA: Biological Dataset, Model, and Benchmark for Visual-Textual-Acoustic Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shinoda_2026_CVPR, author = {Shinoda, Risa and Shiohara, Kaede and Inoue, Nakamasa and Saito, Kuniaki and Santo, Hiroaki and Okura, Fumio}, title = {BioVITA: Biological Dataset, Model, and Benchmark for Visual-Textual-Acoustic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29336-29346} }
Anti-I2V: Safeguarding your Photos from Malicious Image-to-video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vu_2026_CVPR, author = {Vu, Duc and Nguyen, Anh and Tran, Chi and Tran, Anh}, title = {Anti-I2V: Safeguarding your Photos from Malicious Image-to-video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37621-37631} }
HFedATM: Hierarchical Federated Domain Generalization via Optimal Transport and Regularized Mean Aggregation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Phan, Trung and Nguyen, Binh and Doan, Khoa D and Wong, Kok-Seng}, title = {HFedATM: Hierarchical Federated Domain Generalization via Optimal Transport and Regularized Mean Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39435-39444} }
Scaling the Long Video Understanding of Multimodal Large Language Models via Visual Memory Mechanism-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tao and Zhang, Kun and Wu, Qiong and Chen, Xiao and Chang, Chao and Sun, Xiaoshuai and Zhou, Yiyi and Ji, Rongrong}, title = {Scaling the Long Video Understanding of Multimodal Large Language Models via Visual Memory Mechanism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31877-31888} }
Hyperbolic Prototype Learning with Uncertainty-Aware Consistency for Continual Test-Time Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Gole_2026_CVPR, author = {Gole, Siddhant and Pal, Akash and More, Amit and Bhat, S Divakar and Chaudhuri, Subhasis and Banerjee, Biplab}, title = {Hyperbolic Prototype Learning with Uncertainty-Aware Consistency for Continual Test-Time Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34694-34703} }
Learning to Assist: Physics-Grounded Human-Human Control via Multi-Agent Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shibata_2026_CVPR, author = {Shibata, Yuto and Yamazaki, Kashu and Jayanti, Lalit and Aoki, Yoshimitsu and Isogawa, Mariko and Fragkiadaki, Katerina}, title = {Learning to Assist: Physics-Grounded Human-Human Control via Multi-Agent Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38345-38354} }
Photo-Guided Tooth Segmentation on 3D Oral Scan Model-
[pdf]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Shaojie and Wei, Guangshun and He, Jiangxin and Zhou, Yuanfeng}, title = {Photo-Guided Tooth Segmentation on 3D Oral Scan Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37558-37567} }
Multi-Scale Gaussian-Language Map for Zero-shot Embodied Navigation and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Sixian and Wang, Yiyao and Song, Xinhang and Zhang, Keming and Xu, Zijian and Jiang, Shuqiang}, title = {Multi-Scale Gaussian-Language Map for Zero-shot Embodied Navigation and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37086-37097} }
Structure-Aware Representation Distillation for Tiny-Dense Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xuesong and Xu, Anke and Cao, Wenbo and Ientilucci, Emmett}, title = {Structure-Aware Representation Distillation for Tiny-Dense Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34775-34783} }
Quota-Calibrated Fine-Grained Alignment with Context-Aware Marginals for Text-based Person Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Dongsheng and Guo, Xinyuan and Zhang, Huijie and Hao, Pingting and Xia, Qiushi}, title = {Quota-Calibrated Fine-Grained Alignment with Context-Aware Marginals for Text-based Person Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31103-31112} }
SpaceTools: Tool-Augmented Spatial Reasoning via Double Interactive RL-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Siyi and Uy, Mikaela Angelina and Song, Chan Hee and Ladhak, Faisal and Murali, Adithyavairavan and Qu, Qing and Birchfield, Stan and Blukis, Valts and Tremblay, Jonathan}, title = {SpaceTools: Tool-Augmented Spatial Reasoning via Double Interactive RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37109-37120} }
VLM-Pruner: Buffering for Spatial Sparsity in an Efficient VLM Centrifugal Token Pruning Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenkai and Ma, Xiaowen and Ni, Zhenliang and Zhang, Dengming and Shu, Han and Jiang, Xin and Chen, Xinghao}, title = {VLM-Pruner: Buffering for Spatial Sparsity in an Efficient VLM Centrifugal Token Pruning Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31952-31961} }
Anchor-Guided Gradient Alignment for Incomplete Multimodal Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Zhi-Hao and Huang, Longfei and Yang, Yang}, title = {Anchor-Guided Gradient Alignment for Incomplete Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37896-37905} }
Understanding and Enforcing Weight Disentanglement in Task Arithmetic-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shangge and Yin, Yuehan and Wang, Lei and Fan, Qi and Shi, Yinghuan and Li, Wenbin and Gao, Yang and Tao, Dacheng}, title = {Understanding and Enforcing Weight Disentanglement in Task Arithmetic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28744-28753} }
HAD: Hallucination-Aware Diffusion Priors for 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xi and Sun, Weiwei and Ren, Zhou and Broaddus, Chris and Huang, Siyu and Guigues, Laurent}, title = {HAD: Hallucination-Aware Diffusion Priors for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29781-29791} }
Towards Stealthy and Effective Backdoor Attacks on Lane Detection: A Naturalistic Data Poisoning Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Yifan and Cao, Yuxin and Zhang, Yedi and He, Wentao and Xiao, Yan and Du, Xianglong and Huang, Zhiyong and Dong, Jin Song}, title = {Towards Stealthy and Effective Backdoor Attacks on Lane Detection: A Naturalistic Data Poisoning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34950-34960} }
Small Object, Great Challenge: A Benchmark for Small Object Visual Grounding-
[pdf]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Wenqi and Li, Ruifan and Lin, Pengyue and Feng, Fangxiang and Ma, Zhanyu and Wang, Xiaojie}, title = {Small Object, Great Challenge: A Benchmark for Small Object Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31822-31832} }
Align While Search: Belief-Guided Exploratory Inference for World-Grounded Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bae_2026_CVPR, author = {Bae, Seohui and Kim, Jeonghye and Sung, Youngchul and Lim, Woohyung}, title = {Align While Search: Belief-Guided Exploratory Inference for World-Grounded Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29642-29651} }
VKG-QA: Visual Knowledge Graph-based Question Answer for Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Yuntao and Wang, Yiming and Yuan, Renshuo and Yue, Jincheng and Chen, Yijing and Fan, Yue and Zhang, Bo and Li, Qian and Cui, Lizhen}, title = {VKG-QA: Visual Knowledge Graph-based Question Answer for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41213-41223} }
AdaCluster: Adaptive Query-Key Clustering for Sparse Attention in Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Haoyue and Wang, Shengnan and Qiao, Yulin and Zhang, Juncheng and Bai, Youhui and Gong, Ping and Jin, Zewen and Li, Cheng}, title = {AdaCluster: Adaptive Query-Key Clustering for Sparse Attention in Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43249-43259} }
LOREAL: Mitigating Low-Resolution Challenges in Vision-Language Models with Attribute-driven Prompt Self-Distillation-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xucong and Wang, Pengkun and Zhao, Zhe and Yu, Liheng and Mao, Rui and Wang, Yang}, title = {LOREAL: Mitigating Low-Resolution Challenges in Vision-Language Models with Attribute-driven Prompt Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39152-39163} }
Cov2Pose: Leveraging Spatial Covariance for Direct Manifold-aware 6-DoF Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Ousalah_2026_CVPR, author = {Ousalah, Nassim Ali and Rostami, Peyman and Gaudilli\`ere, Vincent and Koumandakis, Emmanuel and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {Cov2Pose: Leveraging Spatial Covariance for Direct Manifold-aware 6-DoF Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40727-40738} }
DriveLaW: Unifying Planning and Video Generation in a Latent Driving World-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Tianze and Li, Yongkang and Zhou, Lijun and Yao, Jingfeng and Xiong, Kaixin and Sun, Haiyang and Wang, Bing and Ma, Kun and Chen, Guang and Ye, Hangjun and Liu, Wenyu and Wang, Xinggang}, title = {DriveLaW: Unifying Planning and Video Generation in a Latent Driving World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39701-39712} }
OrthoFuse: Training-free Riemannian Fusion of Orthogonal Style-Concept Adapters for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aliev_2026_CVPR, author = {Aliev, Ali and Garifullin, Kamil and Yudin, Nikolay and Soboleva, Vera and Molozhavenko, Alexander and Oseledets, Ivan and Alanov, Aibek and Rakhuba, Maxim}, title = {OrthoFuse: Training-free Riemannian Fusion of Orthogonal Style-Concept Adapters for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36009-36018} }
Generative Neural Video Compression via Video Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Qi and Cheng, Hao and Yang, Tinghan and Jin, Libiao and Ma, Siwei}, title = {Generative Neural Video Compression via Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43239-43248} }
Understanding and Mitigating Hallucinations in Multimodal Chain-of-Thought Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Ji and Suo, Wei and Wang, Peng and Zhang, Yanning}, title = {Understanding and Mitigating Hallucinations in Multimodal Chain-of-Thought Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40224-40234} }
BlackMirror: Black-Box Backdoor Detection for Text-to-Image Models via Instruction-Response Deviation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Feiran and Xu, Qianqian and Bao, Shilong and Yang, Zhiyong and Zhao, Xilin and Cao, Xiaochun and Huang, Qingming}, title = {BlackMirror: Black-Box Backdoor Detection for Text-to-Image Models via Instruction-Response Deviation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30098-30109} }
Taming Video Models for 3D and 4D Generation via Zero-Shot Camera Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Chenxi and Yang, Yanming and Zhao, Tong and Li, Ruibo and Zhang, Chi}, title = {Taming Video Models for 3D and 4D Generation via Zero-Shot Camera Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40352-40363} }
Learning Eigenstructures of Unstructured Data Manifolds-
[pdf]
[supp]
[bibtex]@InProceedings{Velich_2026_CVPR, author = {Velich, Roy and Piven, Arkadi and Bensa{\"\i}d, David and Cremers, Daniel and Dag\`es, Thomas and Kimmel, Ron}, title = {Learning Eigenstructures of Unstructured Data Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36201-36214} }
Scaling Test-Time Robustness of Vision-Language Models via Self-Critical Inference Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Kaihua and Qi, Jiaxin and Ou, Jinli and Zheng, Yuhua and Huang, Jianqiang}, title = {Scaling Test-Time Robustness of Vision-Language Models via Self-Critical Inference Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39361-39371} }
Spectral Mixture-of-Experts for Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Chen and Dong, Xingbo and Shen, Xuelin and Jin, Zhe}, title = {Spectral Mixture-of-Experts for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39972-39982} }
Learning Convex Decomposition via Feature Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuezhi and Huang, Qixing and Uy, Mikaela Angelina and Sharp, Nicholas}, title = {Learning Convex Decomposition via Feature Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36190-36200} }
M4Human: A Large-Scale Multimodal mmWave Radar Benchmark for Human Mesh Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Junqiao and Zhou, Yunjiao and Yang, Yizhuo and Cui, Xinyuan and Zhang, Jiarui and Xie, Lihua and Yang, Jianfei and Lu, Chris Xiaoxuan and Ding, Fangqiang}, title = {M4Human: A Large-Scale Multimodal mmWave Radar Benchmark for Human Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42836-42846} }
ELiC: Efficient LiDAR Geometry Compression via Cross-Bit-depth Feature Propagation and Bag-of-Encoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Junsik and Bang, Gun and Kim, Soowoong}, title = {ELiC: Efficient LiDAR Geometry Compression via Cross-Bit-depth Feature Propagation and Bag-of-Encoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39011-39020} }
PV-Ground: Text-Guided Point-Voxel Interaction for 3D Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Shang_2026_CVPR, author = {Shang, Junpeng and Shao, Feifei and Xiao, Jun and Li, Lin and Wang, Hongwei and Ma, Dongfang}, title = {PV-Ground: Text-Guided Point-Voxel Interaction for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38657-38667} }
Learning Differentiable Hierarchies in 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Youqi and Zhou, Wugen and Zha, Hongbin}, title = {Learning Differentiable Hierarchies in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40939-40948} }
Attention, May I Have Your Decision? Localizing Generative Choices in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zaleska_2026_CVPR, author = {Zaleska, Katarzyna and Popek, {\L}ukasz and Wysocza\'nska, Monika and Deja, Kamil}, title = {Attention, May I Have Your Decision? Localizing Generative Choices in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30854-30863} }
Lenses: Toward Polysemous Vision-Language Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Alomari_2026_CVPR, author = {Alomari, Hani and Asgarov, Ali and Thomas, Chris}, title = {Lenses: Toward Polysemous Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37810-37820} }
Uni3R: Unified 3D Reconstruction and Semantic Understanding via Generalizable Gaussian Splatting from Unposed Multi-View Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xiangyu and Jiang, Haoyi and Liu, Liu and Nam, Seungtae and Kang, Gyeongjin and Wang, Xinjie and Sui, Wei and Su, Zhizhong and Liu, Wenyu and Wang, Xinggang and Park, Eunbyung}, title = {Uni3R: Unified 3D Reconstruction and Semantic Understanding via Generalizable Gaussian Splatting from Unposed Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33280-33290} }
Circular-DPO: Aligning Multi-Stage 3D Generative Models via Preference Feedback Loop-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zejian and Ma, Jiarui and Xu, Han and Zheng, Weiting and Zhu, Yangrui and Meng, Chenye and Chen, Pei and Yang, Ling and Yang, Zhiyuan and Yang, Changyuan and Yang, Guang and Koh, Immanuel and Sun, Lingyun}, title = {Circular-DPO: Aligning Multi-Stage 3D Generative Models via Preference Feedback Loop}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32592-32601} }
Align Images Before You Generate-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shihua and Shen, Qiuhong and Wang, Xinchao}, title = {Align Images Before You Generate}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30521-30531} }
FantasyVLN: Unified Multimodal Chain-of-Thought Reasoning for Vision-and-Language Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Jing and Mu, Lingzhou and Jiang, Fan and Ma, Chengcheng and Xu, Mu and Qi, Yonggang}, title = {FantasyVLN: Unified Multimodal Chain-of-Thought Reasoning for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29674-29683} }
Omni-Fake: Benchmarking Unified Multimodal Social Media Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tianxiao and Huang, Zhenglin and Wen, Haiquan and He, Yiwei and Li, Xinze and Zhu, Bingyu and Duan, Wuhui and Chen, Congang and Fu, Zeyu and Dong, Yi and Wu, Baoyuan and Li, Xiangtai and Cheng, Guangliang}, title = {Omni-Fake: Benchmarking Unified Multimodal Social Media Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30299-30311} }
ELV-Halluc: Benchmarking Semantic Aggregation Hallucinations in Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Hao and Wang, Jiahao and Zhang, Yaolun and Wang, Ruohui and Zheng, Xuanyu and Tang, Yepeng and Lin, Dahua and Lu, Lewei}, title = {ELV-Halluc: Benchmarking Semantic Aggregation Hallucinations in Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32572-32581} }
AviaSafe: A Physics-Informed Data-Driven Model for Aviation Safety-Critical Cloud Forecasts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zijian and Huang, Qiusheng and Guo, Anboyu and Zhong, Xiaohui and Li, Hao}, title = {AviaSafe: A Physics-Informed Data-Driven Model for Aviation Safety-Critical Cloud Forecasts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33143-33152} }
Select, Hypothesize and Verify: Towards Verified Neuron Concept Interpretation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, ZeBin and Hu, Yang and Bi, Xiuli and Liu, Bo and Xiao, Bin}, title = {Select, Hypothesize and Verify: Towards Verified Neuron Concept Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31316-31325} }
SparseOIT: Improving Order-Independent Transparency 3DGS via Active Set Method-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Wentao and Kong, Fanzhen and Kang, Zejian and Huang, Xiangru}, title = {SparseOIT: Improving Order-Independent Transparency 3DGS via Active Set Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41012-41021} }
Guiding Token-Sparse Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krause_2026_CVPR, author = {Krause, Felix and Baumann, Stefan Andreas and Schusterbauer, Johannes and Grebenkova, Olga and Gui, Ming and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Guiding Token-Sparse Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35789-35799} }
ScenDi: 3D-to-2D Scene Diffusion Cascades for Urban Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hanlei and Shao, Jiahao and Chen, Xinya and Tan, Xiyang and Miao, Sheng and Shen, Yujun and Liao, Yiyi}, title = {ScenDi: 3D-to-2D Scene Diffusion Cascades for Urban Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40291-40302} }
Grounded Chain-of-Thought for Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Qiong and Yang, Xiangcong and Zhou, Yiyi and Fang, Chenxin and Song, Baiyang and Sun, Xiaoshuai and Ji, Rongrong}, title = {Grounded Chain-of-Thought for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33577-33587} }
EventHub: Data Factory for Generalizable Event-Based Stereo Networks without Active Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bartolomei_2026_CVPR, author = {Bartolomei, Luca and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano and Gallego, Guillermo}, title = {EventHub: Data Factory for Generalizable Event-Based Stereo Networks without Active Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37063-37074} }
Boosting Self-Supervised Tracking with Contextual Prompts and Noise Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yaozong and Liang, Qihua and Zhong, Bineng and Zeng, Shuimu and Xue, Yuanliang and Li, Ning and Song, Shuxiang}, title = {Boosting Self-Supervised Tracking with Contextual Prompts and Noise Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35197-35206} }
UniGame: Turning a Unified Multimodal Model Into Its Own Adversary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Zhaolong and Lu, Wang and Chen, Hao and Li, Sharon and Wang, Jindong}, title = {UniGame: Turning a Unified Multimodal Model Into Its Own Adversary}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37632-37641} }
GS^2: Graph-based Spatial Distribution Optimization for Compact 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xianben and Wang, Tao and Li, Yuxuan and Jin, Yi and Ling, Haibin}, title = {GS{\textasciicircum}2: Graph-based Spatial Distribution Optimization for Compact 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33259-33268} }
Spatio-Temporal Difference Guided Motion Deblurring with the Complementary Vision Sensor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Yapeng and Yang, Lin and Chen, Yuguo and Chen, Xiangru and Wang, Taoyi and Wang, Lijian and Yang, Zheyu and Lin, Yihan and Zhao, Rong}, title = {Spatio-Temporal Difference Guided Motion Deblurring with the Complementary Vision Sensor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37496-37506} }
Motus: A Unified Latent Action World Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bi_2026_CVPR, author = {Bi, Hongzhe and Tan, Hengkai and Xie, Shenghao and Wang, Zeyuan and Huang, Shuhe and Liu, Haitian and Zhao, Ruowen and Feng, Yao and Xiang, Chendong and Rong, Yinze and Zhao, Hongyan and Liu, Hanyu and Su, Zhizhong and Ma, Lei and Su, Hang and Zhu, Jun}, title = {Motus: A Unified Latent Action World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35101-35113} }
Physics-Consistent Diffusion for Efficient Fluid Super-Resolution via Multiscale Residual Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhihao and Dong, Shengwei and Yi, Chuang and Gao, Junxuan and Lai, Zhilu and Liu, Zhiqiang and Wang, Wei and Zhang, Guangtao}, title = {Physics-Consistent Diffusion for Efficient Fluid Super-Resolution via Multiscale Residual Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30574-30583} }
GenColorBench: A Color Evaluation Benchmark for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Butt_2026_CVPR, author = {Butt, Muhammad Atif and Gomez-Villa, Alexandra and Wu, Tao and Vazquez-Corral, Javier and Van De Weijer, Joost and Wang, Kai}, title = {GenColorBench: A Color Evaluation Benchmark for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36638-36648} }
FedRAC: Rolling Submodel Allocation for Collaborative Fairness in Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zihui and Fu, Yuhang and Du, Mengmeng and Yuan, Zhimin and Liu, Yachen and Liao, Weisheng and Wang, Kaiyu and Wang, Zheng}, title = {FedRAC: Rolling Submodel Allocation for Collaborative Fairness in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31802-31811} }
SpeeDiff: Scalable Pixel-Anchored End-to-End Latent Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bingliang and Chu, Wenda and Li, Yizhuo and Yang, Linjie and Yue, Yisong and Bouman, Katherine and Song, Yang and Guo, Qiushan}, title = {SpeeDiff: Scalable Pixel-Anchored End-to-End Latent Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35893-35903} }
PoseMaster: A Unified 3D Native Framework for Stylized Pose Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Hongyu and Luo, Kunming and Li, Weiyu and Zhang, Kaiyi and Liang, Yixun and Huang, Jingwei and Guo, Chunchao and Tan, Ping}, title = {PoseMaster: A Unified 3D Native Framework for Stylized Pose Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34292-34302} }
Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Yu, Weijiang and Ma, Jilin and Liu, Ziyu and Tang, Guijian and Yang, Wenjing and Tan, Huibin and Xiao, Nong}, title = {Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33794-33804} }
CineBrain: A Large-Scale Multi-Modal Audiovisual Brain Dataset for Brain-Conditioned Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jianxiong and Liu, Yichang and Yang, Baofeng and Feng, Jianfeng and Fu, Yanwei}, title = {CineBrain: A Large-Scale Multi-Modal Audiovisual Brain Dataset for Brain-Conditioned Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36224-36234} }
Latent Implicit Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kelvin and Shang, Chuyi and Karlinsky, Leonid and Feris, Rogerio and Darrell, Trevor and Herzig, Roei}, title = {Latent Implicit Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33457-33466} }
Fractal Camouflage: A Bio-Inspired Approach for Multi-Scale Adversarial Attacks in the Infrared Domain-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Chengyin and Wang, Xin and Qiu, Rui and Jia, Zhe and Zhao, Yingying and Wang, Kai and Kang, Xu and Wei, Yiwei}, title = {Fractal Camouflage: A Bio-Inspired Approach for Multi-Scale Adversarial Attacks in the Infrared Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34981-34990} }
Finding Distributed Object-Centric Properties in Self-Supervised Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rawlekar_2026_CVPR, author = {Rawlekar, Samyak and Swain, Amitabh and Cai, Yujun and Wang, Yiwei and Yang, Ming-Hsuan and Ahuja, Narendra}, title = {Finding Distributed Object-Centric Properties in Self-Supervised Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31326-31335} }
KLIP: Localized Distribution Shift Detection via KL-Divergence with Diffusion Priors in Inverse Problems-
[pdf]
[supp]
[bibtex]@InProceedings{Kheirandish_2026_CVPR, author = {Kheirandish, Alireza and Hong, Jihoon and Fridovich-Keil, Sara}, title = {KLIP: Localized Distribution Shift Detection via KL-Divergence with Diffusion Priors in Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30823-30832} }
Scaling Up AI-Generated Image Detection with Generator-Aware Prototypes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Ziheng and Ji, Yuheng and Tao, Renshuai and Tian, Yuxuan and Liu, Yuyang and Wang, Yipu and Zheng, Xiaolong}, title = {Scaling Up AI-Generated Image Detection with Generator-Aware Prototypes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43008-43017} }
ViKey: Enhancing Temporal Understanding in Videos via Visual Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yeonkyung and Ju, Dayun and Kim, Youngmin and Kang, Seil and Hwang, Seong Jae}, title = {ViKey: Enhancing Temporal Understanding in Videos via Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38880-38890} }
Extending Embodied Question Answering from Perception to Decision-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Xicheng and Li, Qiwei and Xu, Peiran and Mu, Yadong}, title = {Extending Embodied Question Answering from Perception to Decision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29567-29577} }
Gaze Target Estimation Anywhere with Concepts-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Xu and Yang, Houze and Gunda, Vipin and Zhou, Zhongyi and Xu, Tianyu and Kowdle, Adarsh and Kim, Inki and Rehg, James M.}, title = {Gaze Target Estimation Anywhere with Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31304-31315} }
Tell Model Where to Look: Mitigating Hallucinations in MLLMs by Vision-Guided Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianfei and Zhang, Feng and Sun, Xin and Feng, Chong and Tan, Zhixing}, title = {Tell Model Where to Look: Mitigating Hallucinations in MLLMs by Vision-Guided Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32582-32591} }
Pico-Banana-400K: A Large-Scale Dataset for Text-Guided Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Yusu and Bocek-Rivele, Eli and Song, Liangchen and Tong, Jialing and Yang, Yinfei and Lu, Jiasen and Hu, Wenze and Gan, Zhe}, title = {Pico-Banana-400K: A Large-Scale Dataset for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37226-37235} }
D2Cache: Second-Order Delta Caching for Higher Video Diffusion Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Enhuai and Wang, Yunke and Sun, Changming and Xu, Chang}, title = {D2Cache: Second-Order Delta Caching for Higher Video Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43589-43599} }
Deformation-based In-Context Learning for Point Cloud Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Chengxing and Deng, Jinhong and Lei, Yinjie and Li, Wen}, title = {Deformation-based In-Context Learning for Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39001-39010} }
Differentiable Laplacian Matrix Guided Superpixel Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Juybari_2026_CVPR, author = {Juybari, Jeremy and Hamilton, Josh and Das, Shuvra and Chen, Chaofan and Khalil, Andre and Zhu, Yifeng}, title = {Differentiable Laplacian Matrix Guided Superpixel Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36168-36178} }
Content-Aware Dynamic Patchification for Efficient Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Sheng and Barnes, Connelly and Rizve, Mamshad Nayeem and Peng, Hongwu and Li, Zhengang and Dibua, Ohi and Ganjdanesh, Alireza and Tang, Xulong and Kang, Yan and Gong, Yifan}, title = {Content-Aware Dynamic Patchification for Efficient Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35936-35945} }
Object-WIPER: Training-Free Object and Associated Effect Removal in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kushwaha_2026_CVPR, author = {Kushwaha, Saksham Singh and Nag, Sayan and Tian, Yapeng and Kulkarni, Kuldeep}, title = {Object-WIPER: Training-Free Object and Associated Effect Removal in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38071-38080} }
EarlyTom: Early Token Compression Completes Fast Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hesong and Jin, Xin and Lu, Lu and Li, Chenhaowen and Chen, Jian and Liu, Qiang and Wang, Huan}, title = {EarlyTom: Early Token Compression Completes Fast Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40559-40568} }
ReaGEN: Adaptive Generation of Structured Chains-of-Thought for Efficient Multimodal Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Ruiqing and Singamsetti, Mohan Sai and Niu, Di and Rashidi, Bahador}, title = {ReaGEN: Adaptive Generation of Structured Chains-of-Thought for Efficient Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33521-33530} }
High Resolution Neural Video Coding with Bi-directional Confidence-Guided Reference Information Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Feng and Zhang, Kai and Zhang, Li and Jia, Chuanmin}, title = {High Resolution Neural Video Coding with Bi-directional Confidence-Guided Reference Information Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33652-33661} }
WorldStereo: Bridging Camera-Guided Video Generation and Scene Reconstruction via 3D Geometric Memories-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yisu and Cao, Chenjie and Wang, Tengfei and Zuo, Xuhui and Wu, Junta and Zhu, Jianke and Guo, Chunchao}, title = {WorldStereo: Bridging Camera-Guided Video Generation and Scene Reconstruction via 3D Geometric Memories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40327-40339} }
SALMUBench: A Benchmark for Sensitive Association-Level Multimodal Unlearning-
[pdf]
[supp]
[bibtex]@InProceedings{Selvas-Sala_2026_CVPR, author = {Selvas-Sala, Cai and Kang, Lei and Gomez, Lluis}, title = {SALMUBench: A Benchmark for Sensitive Association-Level Multimodal Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39351-39360} }
MusicInfuser: Making Video Diffusion Listen and Dance-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Susung and Kemelmacher-Shlizerman, Ira and Curless, Brian and Seitz, Steven M.}, title = {MusicInfuser: Making Video Diffusion Listen and Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43751-43761} }
Group Diffusion: Enhancing Image Generation by Unlocking Cross-Sample Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2026_CVPR, author = {Mo, Sicheng and Nguyen, Thao and Zhang, Richard and Kolkin, Nick and Iyer, Siddharth Srinivasan and Shechtman, Eli and Singh, Krishna Kumar and Lee, Yong Jae and Zhou, Bolei and Li, Yuheng}, title = {Group Diffusion: Enhancing Image Generation by Unlocking Cross-Sample Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35702-35712} }
MoRe: Motion-aware Feed-forward 4D Reconstruction Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Juntong and Chen, Zequn and Zhang, Weiqi and Di, Donglin and Zhang, Xuancheng and Yang, Chengmin and Liu, Yu-Shen}, title = {MoRe: Motion-aware Feed-forward 4D Reconstruction Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28914-28924} }
Simple-ViLMedSAM: Simple Text Prompts Meet Vision-Language Models for Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Chengcan and Nie, Dong and Chen, Geng and Zhang, Daoqiang and Wen, Xuyun}, title = {Simple-ViLMedSAM: Simple Text Prompts Meet Vision-Language Models for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30042-30052} }
TIPSv2: Advancing Vision-Language Pretraining with Enhanced Patch-Text Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Bingyi and Chen, Koert and Maninis, Kevis-Kokitsi and Chen, Kaifeng and Karpur, Arjun and Xia, Ye and Dua, Sahil and Dabral, Tanmaya and Han, Guangxing and Han, Bohyung and Ainslie, Joshua and Bewley, Alex and Jacob, Mithun and Wagner, Ren\'e and Ramos, Washington and Choromanski, Krzysztof and Seyedhosseini, Mojtaba and Zhou, Howard and Araujo, Andre}, title = {TIPSv2: Advancing Vision-Language Pretraining with Enhanced Patch-Text Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29325-29335} }
The LLM Bottleneck: Why Open-Source Vision LLMs Struggle with Hierarchical Visual Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Yuwen and Qing, Yuan and Gong, Boqing}, title = {The LLM Bottleneck: Why Open-Source Vision LLMs Struggle with Hierarchical Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38532-38543} }
Test-Time Alignment of Text-to-Image Diffusion Models via Null-Text Embedding Optimisation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Taehoon and Gouk, Henry and Hospedales, Timothy}, title = {Test-Time Alignment of Text-to-Image Diffusion Models via Null-Text Embedding Optimisation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43611-43620} }
SPEAR-1: Scaling Beyond Robot Demonstrations via 3D Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nikolov_2026_CVPR, author = {Nikolov, Nikolay and Albanese, Giuliano and Dey, Sombit and Yanev, Aleksandar and Van Gool, Luc and Zaech, Jan-Nico and Paudel, Danda Pani}, title = {SPEAR-1: Scaling Beyond Robot Demonstrations via 3D Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35124-35134} }
When Token Pruning is Worse than Random: Understanding Visual Token Information in VLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yahong and Wu, Juncheng and Ni, Zhangkai and Yang, Longzhen and Liu, Yihang and Yang, Chengmei and Wen, Ying and He, Lianghua and Tang, Xianfeng and Liu, Hui and Zhou, Yuyin}, title = {When Token Pruning is Worse than Random: Understanding Visual Token Information in VLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31910-31919} }
Rethinking Diffusion Model-Based Video Super-Resolution: Leveraging Dense Guidance from Aligned Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jingyi and Zheng, Meisong and Chen, Ying and Qiao, Minglang and Deng, Xin and Xu, Mai}, title = {Rethinking Diffusion Model-Based Video Super-Resolution: Leveraging Dense Guidance from Aligned Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38248-38257} }
TANGO: Text-Anchored Guided Optimization for Robust Fine-tuning Vision-Language Models under Label Noise-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Tengfei and Pan, Weiran and Wei, Wei}, title = {TANGO: Text-Anchored Guided Optimization for Robust Fine-tuning Vision-Language Models under Label Noise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39186-39196} }
Too Vivid to Be Real? Benchmarking and Calibrating Generative Color Fidelity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Zhengyao and Jia, Zexi and Zhong, Yijia and Luo, Pengcheng and Zhang, Jinchao and Lu, Guangming and Yu, Jun and Pei, Wenjie}, title = {Too Vivid to Be Real? Benchmarking and Calibrating Generative Color Fidelity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37258-37267} }
EgoRoC: Towards Egocentric Robotic Control via Task-Agnostic Visual Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Zhang, Chi and Li, Nan and Zhang, Qian and Zhang, Qi and Li, Mingyan}, title = {EgoRoC: Towards Egocentric Robotic Control via Task-Agnostic Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34991-35001} }
Physics-Guided Multistep Deformation Reversal for Ancient Bamboo Slip Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Qianqian and Zhu, Jinchi and Zhou, Xiaolu and Xu, Yongchao}, title = {Physics-Guided Multistep Deformation Reversal for Ancient Bamboo Slip Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34061-34071} }
In Pursuit of Pixel Supervision for Visual Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Lihe and Li, Shang-Wen and Li, Yang and Lei, Xinjie and Wang, Dong and Mohamed, Abdelrahman and Xie, Saining and Zhao, Hengshuang and He, Kaiming and Xu, Hu}, title = {In Pursuit of Pixel Supervision for Visual Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31974-31984} }
Dense Metric Depth Completion from Sparse Direct Time-of-Flight Sensors-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hakyeong and Wang, Ruicheng and Yao, Chengtang and Yang, Jiaolong and Kim, Min H.}, title = {Dense Metric Depth Completion from Sparse Direct Time-of-Flight Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36518-36528} }
Omni-Supervised Motion Editing: Balancing Change and Invariance through Positive-Negative Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Zhenwu and Gong, Jingyu and Wang, Peiwei and Wang, Xingzan and Qian, Tianwen and Li, Wenxi and Fang, Yuan and Xie, Jiao and Ma, Lizhuang and Lin, Shaohui}, title = {Omni-Supervised Motion Editing: Balancing Change and Invariance through Positive-Negative Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30595-30606} }
FabricGen: Microstructure-Aware Woven Fabric Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yingjie and Luo, Di and Wang, Zixiong and Ling, Xiaoli and Yang, Jian and Wang, Beibei}, title = {FabricGen: Microstructure-Aware Woven Fabric Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34333-34342} }
FedRE: A Representation Entanglement Framework for Model-Heterogeneous Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Yuan and Wang, Lixu and Wu, Jiaqi and Song, Jin and Chen, Simin and Wang, Zehua and Tian, Zijian and Chen, Wei and Li, Huixia and Li, Xiaoxiao}, title = {FedRE: A Representation Entanglement Framework for Model-Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39466-39475} }
Scale Space Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mukhopadhyay_2026_CVPR, author = {Mukhopadhyay, Soumik and Udhayanan, Prateksha and Shrivastava, Abhinav}, title = {Scale Space Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35851-35860} }
VisRes Bench: On Evaluating the Visual Reasoning Capabilities of VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Tortei_2026_CVPR, author = {T\"ortei, Brigitta Malagurski and Dahou, Yasser and Huynh, Ngoc Dung and Para, Wamiq Reyaz and Khac, Ph\'uc H. L\^e and Singh, Ankit and Chaybouti, Sofian and Narayan, Sanath}, title = {VisRes Bench: On Evaluating the Visual Reasoning Capabilities of VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33185-33195} }
Learned Image Compression via Sparse Attention and Adaptive Frequency-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Huidong and Shi, Xinyan and Sun, Hui and Yue, Xiaofei and Liu, Xiaoguang and Wang, Gang and Cai, Wentong}, title = {Learned Image Compression via Sparse Attention and Adaptive Frequency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41278-41287} }
FedAdamom: Adaptive Momentum for Improved Generalization in Federated Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Wenjie and Chen, Tianxiang and Wang, Feng and Wu, Tiantong and Zheng, Zhiming and Tang, Shaoting and Lim, Wei Yang Bryan}, title = {FedAdamom: Adaptive Momentum for Improved Generalization in Federated Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36354-36364} }
SEATrack: Simple, Efficient, and Adaptive Multimodal Tracker-
[pdf]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Junbin and Xue, Ziteng and Zhang, Shihui and Chen, Kun and Hu, Weiming and Zhang, Zhipeng}, title = {SEATrack: Simple, Efficient, and Adaptive Multimodal Tracker}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28679-28689} }
Dual-level Adapter Boosting Prompt-free Curvilinear Structure Segmentation-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kai and Chen, Li and Cheng, Jun}, title = {Dual-level Adapter Boosting Prompt-free Curvilinear Structure Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36300-36310} }
Anchoring the Mind of Multimodal Reasoners: Cognitive Bias as a Vector for Jailbreak Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Cong_2026_CVPR, author = {Cong, Linhua and Sima, Bingrui and He, Kun}, title = {Anchoring the Mind of Multimodal Reasoners: Cognitive Bias as a Vector for Jailbreak Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36926-36935} }
BEV-CAR: Enhancing Monocular Bird's Eye View Segmentation with Context-Aware Rasterization-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Yixin and Wang, Ke and Cheng, Tongtong and Liu, Chunhui and Liu, Kai}, title = {BEV-CAR: Enhancing Monocular Bird's Eye View Segmentation with Context-Aware Rasterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39869-39878} }
BiomedCCPL: Causal Conditional Prompt Learning for Biomedical Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Xueliang and Zhang, Juncai and Hou, Jiacheng and Lu, Dan and Zhang, Hao and Wang, Ruxin}, title = {BiomedCCPL: Causal Conditional Prompt Learning for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40812-40821} }
RaUF: Learning the Spatial Uncertainty Field of Radar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shengpeng and Wang, Kuangyu and Wang, Wei}, title = {RaUF: Learning the Spatial Uncertainty Field of Radar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42474-42483} }
Cross-modal Fuzzy Alignment Network for Text-Aerial Person Retrieval and A Large-scale Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Yifei and Li, Chenglong and Zhang, Yuyang and Hu, Guyue and Tang, Jin}, title = {Cross-modal Fuzzy Alignment Network for Text-Aerial Person Retrieval and A Large-scale Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38732-38741} }
SkillSight: Efficient First-Person Skill Assessment with Gaze-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chi Hsuan and Kumar, Ashutosh and Grauman, Kristen}, title = {SkillSight: Efficient First-Person Skill Assessment with Gaze}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38891-38903} }
Molmo2: Open Weights and Data for Vision-Language Models with Video Understanding and Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Clark_2026_CVPR, author = {Clark, Christopher and Zhang, Jieyu and Ma, Zixian and Park, Jae Sung and Tripathi, Rohun and Lee, Sangho and Salehi, Mohammadreza and Ren, Jason and Kim, Chris Dongjoo and Yang, Yinuo and Shao, Vincent and Yang, Yue and Huang, Weikai and Gao, Ziqi and Anderson, Taira and Zhang, Jianrui and Jain, Jitesh and Stoica, George and Farhadi, Ali and Krishna, Ranjay}, title = {Molmo2: Open Weights and Data for Vision-Language Models with Video Understanding and Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28652-28668} }
MonoVLM: Monocular 3D Visual Grounding with Vision Language Models-
[pdf]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Huaizhi and Mahjoub, Hossein Nourkhiz and Tadiparthi, Vaishnav and Lee, Kwonjoon and Chen, Tianlong}, title = {MonoVLM: Monocular 3D Visual Grounding with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30986-30996} }
Single-Round Scalable Analytic Federated Learning-
[pdf]
[bibtex]@InProceedings{Bacellar_2026_CVPR, author = {Bacellar, Alan T. L. and Munir, Mustafa and Fran\c{c}a, Felipe M. G. and Lima, Priscila M. V. and Marculescu, Radu and John, Lizy K.}, title = {Single-Round Scalable Analytic Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39445-39454} }
HierEdit: Region-Aware Hierarchical Diffusion for Efficient High-Resolution Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuyao and Huang-Menders, Alexander and Tai, Yu-Wing}, title = {HierEdit: Region-Aware Hierarchical Diffusion for Efficient High-Resolution Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43546-43557} }
BluRef: Unsupervised Image Deblurring with Dense-Matching References-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2026_CVPR, author = {Pham, Bang-Dang and Tran, Anh and Pham, Cuong and Hoai, Minh}, title = {BluRef: Unsupervised Image Deblurring with Dense-Matching References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37445-37454} }
Scaling Instruction-Based Video Editing with a High-Quality Synthetic Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Qingyan and Wang, Qiuyu and Ouyang, Hao and Yu, Yue and Wang, Hanlin and Wang, Wen and Cheng, Ka Leong and Ma, Shuailei and Zeng, Yanhong and Liu, Zichen and Xu, Yinghao and Shen, Yujun and Chen, Qifeng}, title = {Scaling Instruction-Based Video Editing with a High-Quality Synthetic Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37971-37981} }
Cluster-Wise Spatio-Temporal Masking for Efficient Video-Language Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Weijun and Huang, Yuqing and Meng, Weikang and Li, Xin and Liu, Ming and Hong, Xiaopeng and Wang, Yaowei and Zuo, Wangmeng}, title = {Cluster-Wise Spatio-Temporal Masking for Efficient Video-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39197-39207} }
FG-Portrait: 3D Flow Guided Editable Portrait Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yating and Miao, Yunqi and Ververas, Evangelos and Deng, Jiankang and Song, Jifei}, title = {FG-Portrait: 3D Flow Guided Editable Portrait Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32947-32956} }
Follow the Saliency: Supervised Saliency for Retrieval-augmented Dense Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Seung hee and Jeon, MinJu and Oh, Hyunwoo and Lee, Jihwan and Kim, Dong-Jin}, title = {Follow the Saliency: Supervised Saliency for Retrieval-augmented Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32808-32817} }
Cross-domain Dual-stream Feature Disentanglement for Brain Disorder Prediction with Sparsely Labeled PET-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Huabin and Chen, Xinyu and Zhou, Yuan and Liu, Fei}, title = {Cross-domain Dual-stream Feature Disentanglement for Brain Disorder Prediction with Sparsely Labeled PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32037-32046} }
Focal-General Diffusion Model with Semantic Consistent Guidance for Sign Language Production-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yiheng and Liu, Sheng and Feng, Yuan and Jin, Zhelun and Jiang, Yining and Xu, Min}, title = {Focal-General Diffusion Model with Semantic Consistent Guidance for Sign Language Production}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35915-35925} }
MatchMask: Mask-Centric Generative Data Augmentation for Label-Scarce Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yuqi and Zhang, Hao and Shao, Wenqi and Liu, Shiqu and Gu, Zhihong and Wang, Wenxiao and He, Xiaofei and Zhang, Kaipeng}, title = {MatchMask: Mask-Centric Generative Data Augmentation for Label-Scarce Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42126-42136} }
Decoupling Vision and Language: Codebook Anchored Visual Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jason and Zhao, Tianchen and Liu, Chang and Cai, Jiarui and Zhang, Zheng and Li, Zhuowei and Singh, Aaditya and Xu, Xiang and Srivastava, Mani and Wu, Jonathan}, title = {Decoupling Vision and Language: Codebook Anchored Visual Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36957-36967} }
Forging a Dynamic Memory: Retrieval-Guided Continual Learning for Generalist Medical Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zizhi and Gao, Yizhen and Han, Minghao and Liu, Yizhou and Chen, Zhaoyu and Yang, Dingkang and Zhang, Lihua}, title = {Forging a Dynamic Memory: Retrieval-Guided Continual Learning for Generalist Medical Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32309-32321} }
Revisiting Visual Corruptions in LVLMs: A Shape-Texture Perspective on Model Failures-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xinkuan and Kan, Meina and He, Zhenliang and Zhou, Yongbin and Shan, Shiguang}, title = {Revisiting Visual Corruptions in LVLMs: A Shape-Texture Perspective on Model Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40845-40854} }
Seeing Conversations: Communication Context Identification in Egocentric Video-
[pdf]
[bibtex]@InProceedings{Dorszewski_2026_CVPR, author = {Dorszewski, Tobias and Hjortkj{\ae}r, Jens}, title = {Seeing Conversations: Communication Context Identification in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38816-38825} }
SparseCam4D: Spatio-Temporally Consistent 4D Reconstruction from Sparse Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Weihong and Zhang, Xiaoyu and Zhang, Zhuang and Ye, Zhichao and Wang, Nan and Liu, Haomin and Zhang, Guofeng}, title = {SparseCam4D: Spatio-Temporally Consistent 4D Reconstruction from Sparse Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33237-33247} }
Learning to Diversify and Focus: A Reinforcement Framework for Open-Vocabulary HOI Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yongchao and Liu, Jiawei and Wang, Junfeng and Tao, Sen and Jiang, Na and Zha, Zheng-Jun}, title = {Learning to Diversify and Focus: A Reinforcement Framework for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34673-34682} }
FireScope: Wildfire Risk Raster Prediction With a Chain-of-Thought Oracle-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Markov_2026_CVPR, author = {Markov, Mario and Ailuro, Stefan and Van Gool, Luc and Schindler, Konrad and Paudel, Danda Pani}, title = {FireScope: Wildfire Risk Raster Prediction With a Chain-of-Thought Oracle}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34795-34805} }
Deciphering Genotype-Phenotype Mechanisms from High-Content Profiling via Knowledge-Guided Multi-modal Graph Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Hanjing and Rao, Jiahua and Sun, Youhan and Xie, Jiancong and Yang, Yuedong}, title = {Deciphering Genotype-Phenotype Mechanisms from High-Content Profiling via Knowledge-Guided Multi-modal Graph Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41804-41814} }
FedBPrompt: Federated Domain Generalization Person Re-Identification via Body Distribution Aware Visual Prompts-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Xin and Li, Weilong and Liu, Wei and Huang, Wenke and Yu, Zhixi and Yang, Bin and Liao, Xiaoying and Jiang, Kui}, title = {FedBPrompt: Federated Domain Generalization Person Re-Identification via Body Distribution Aware Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40448-40457} }
ARMFlow: AutoRegressive MeanFlow for Online 3D Human Reaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Zichen and Hayder, Zeeshan and Liu, Wei and Wang, Hesheng and Mian, Ajmal Saeed}, title = {ARMFlow: AutoRegressive MeanFlow for Online 3D Human Reaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30718-30728} }
FUSAR-GPT: A Spatiotemporal Feature-Embedded and Two-Stage Decoupled Visual Language Model for SAR Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaokun and Yang, Yi and Ye, Ziqi and Baiyun, Baiyun and Guo, Xiaorong and Fang, Qingchen and Zhang, Ruyi and Zhou, Xinpeng and Wang, Haipeng}, title = {FUSAR-GPT: A Spatiotemporal Feature-Embedded and Two-Stage Decoupled Visual Language Model for SAR Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42158-42168} }
IrisFP: Adversarial-Example-based Model Fingerprinting with Enhanced Uniqueness and Robustness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Ziye and Yang, Guang and Chen, Yihang and Luo, Changqing}, title = {IrisFP: Adversarial-Example-based Model Fingerprinting with Enhanced Uniqueness and Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39383-39392} }
CrossHOI-Bench: A Unified Benchmark for HOI Evaluation across Vision-Language Models and HOI-Specific Methods-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Qinqian and Wang, Bo and Tan, Robby T.}, title = {CrossHOI-Bench: A Unified Benchmark for HOI Evaluation across Vision-Language Models and HOI-Specific Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38520-38531} }
Unleashing Stealthy Backdoor Pandemic by Infecting a Single Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Al_Nahian_2026_CVPR, author = {Al Nahian, Mohaiminul and Almalky, Abeer Matar and Ahmed, Sabbir and Al Arafat, Abdullah and Rizve, Mamshad Nayeem and Rakin, Adnan Siraj}, title = {Unleashing Stealthy Backdoor Pandemic by Infecting a Single Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34889-34899} }
Joint-Aligned Latent Action: Towards Scalable VLA Pretraining in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Hao and Wang, Ye and Zhang, Wanpeng and Yuan, Haoqi and Feng, Yicheng and Xu, Haiweng and Zheng, Sipeng and Lu, Zongqing}, title = {Joint-Aligned Latent Action: Towards Scalable VLA Pretraining in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35047-35058} }
SIGMA: Selective-Interleaved Generation with Multi-Attribute Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoyan and Bai, Zechen and Wang, Haofan and Song, Yiren}, title = {SIGMA: Selective-Interleaved Generation with Multi-Attribute Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38165-38175} }
TaskForce: Cooperative Multi-agent Reinforcement Learning for Multi-task Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Wonhyeok and Hwang, Kyumin and Park, Jihun and Lee, Kyoungmin and Lee, Seunghun and Kim, Jaeyeul and Choi, Minwoo and Im, Sunghoon}, title = {TaskForce: Cooperative Multi-agent Reinforcement Learning for Multi-task Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36871-36880} }
From Attraction to Equilibrium: Physics-Inspired Semantic Gravitons for Zero-Shot Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Yuwen and Wang, Yuan and Li, Shaohui and Li, Zhi and Liu, Yu and He, You}, title = {From Attraction to Equilibrium: Physics-Inspired Semantic Gravitons for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35628-35637} }
SegGBC: Justifiable Coarse-to-Fine Granular-Ball Computing for Enhancing Clustering Image Segmentation-
[pdf]
[bibtex]@InProceedings{Chong_2026_CVPR, author = {Chong, Qianpeng and Zeng, Wenyi and Shen, Xiuxuan and Li, Jiajie and Yin, Qian and Zheng, Xin}, title = {SegGBC: Justifiable Coarse-to-Fine Granular-Ball Computing for Enhancing Clustering Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42104-42114} }
GaussianMatch: Semi-Supervised Regression with Pseudo-Label Filtering via Multi-View Gaussian Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yin and Lu, Hao and Wang, Zixuan and Qin, Zhen and Kuang, Li and Zhou, Mengchu and Deng, Shuiguang}, title = {GaussianMatch: Semi-Supervised Regression with Pseudo-Label Filtering via Multi-View Gaussian Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31985-31994} }
Fresco: Frequency-Spatial Consistent Optimization for Fine-Grained Head Avatar Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shikun and Li, Yong and Wang, Yiqun and Ke, Qiuhong and Chen, Cunjian}, title = {Fresco: Frequency-Spatial Consistent Optimization for Fine-Grained Head Avatar Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40130-40139} }
Long-RVOS: A Comprehensive Benchmark for Long-term Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Tianming and Jiang, Haichao and Yang, Yuting and Tan, Chaolei and Li, Shuai and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Long-RVOS: A Comprehensive Benchmark for Long-term Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39497-39507} }
Curvature-Aware Captioning: Leveraging Geodesic Attention for 3D Scene Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Ziyao and Liu, Yingjie and Yangrui, Zhang and Chen, Mingsong and Tang, Xuan and Wei, Xian}, title = {Curvature-Aware Captioning: Leveraging Geodesic Attention for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30997-31007} }
Personalized Image Descriptions from Attention Sequences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Ruoyu and Le, Hieu and Xu, Jingyi and Mondal, Sounak and Leite, Abe and Zelinsky, Gregory and Hoai, Minh and Samaras, Dimitris}, title = {Personalized Image Descriptions from Attention Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40042-40052} }
MER-Tracker: Towards High-Speed 3D Point Tracking via Multi-View Event-RGB Hybrid Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Yiqian and Ye, Qinghong and Xu, Haoran and Li, Jianing and Ma, Dongyang and Wang, Xuan and Zhang, Wei and Tian, Yonghong and Peng, Peixi}, title = {MER-Tracker: Towards High-Speed 3D Point Tracking via Multi-View Event-RGB Hybrid Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37032-37042} }
Experience Transfer for Multimodal LLM Agents in Minecraft Game-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chenghao and Liu, Jun and Zhang, Songbo and Jian, Huadong and Ni, Hao and Lee, Lik-Hang and Bae, Sung-Ho and Wang, Guoqing and Yang, Yang and Zhang, Chaoning}, title = {Experience Transfer for Multimodal LLM Agents in Minecraft Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37143-37153} }
HeSS: Head Sensitivity Score for Sparsity Redistribution in VGGT-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Yongsung and Song, Wooseok and Lew, Jaihyun and Hwangbo, Hun and Lee, Jaehoon and Yoon, Sungroh}, title = {HeSS: Head Sensitivity Score for Sparsity Redistribution in VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36509-36517} }
Towards Dynamic Modality Alignment in Multimodal Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Jiayao and Lyu, Fan and Liu, Tianle and Hu, Fuyuan and Feng, Wei}, title = {Towards Dynamic Modality Alignment in Multimodal Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39911-39921} }
SonoWorld: From One Image to a 3D Audio-Visual Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Derong and Chen, Xiyi and Lin, Ming C. and Gao, Ruohan}, title = {SonoWorld: From One Image to a 3D Audio-Visual Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30194-30204} }
AREA3D: Active Reconstruction Agent with Unified Feed-Forward 3D Perception and Vision-Language Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Tianling and Gan, Shengzhe and Gu, Leslie and Li, Yuelei and Zhan, Fangneng and Pfister, Hanspeter}, title = {AREA3D: Active Reconstruction Agent with Unified Feed-Forward 3D Perception and Vision-Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37133-37142} }
Detect Any AI-Counterfeited Text Image-
[pdf]
[supp]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Chenfan and Zhong, Yiwu and Zhu, Xuekang and Li, Junchi and Jiang, Changjiang and liu, Jian and Jin, Lianwen}, title = {Detect Any AI-Counterfeited Text Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35437-35450} }
ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Aoyu and Liu, Zhen and Wang, Ziyi and Chen, Dian and Zeng, Bing and Liu, Shuaicheng}, title = {ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29909-29918} }
DNF-SR: Dual-Input and Negative-Aware Feature Fine-Tuning for Real-World Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Shuhao and Liao, Wenjie and Vance, Hayden and Dong, Hang and Zhang, Rui and Guo, Chun-Le and Li, Chongyi}, title = {DNF-SR: Dual-Input and Negative-Aware Feature Fine-Tuning for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38176-38186} }
Taming Generative Diffusion Model for Task-Oriented Infrared Imaging-
[pdf]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Tengyu and Dai, Zhilong and Diao, Yubo and An, Guanming and Ma, Long and Liu, Jinyuan and Liu, Risheng}, title = {Taming Generative Diffusion Model for Task-Oriented Infrared Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30843-30853} }
Beyond Fixed Formulas: Data-Driven Linear Predictor for Efficient Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Zhirong and Huang, Rui and Liu, Jiacheng and Zou, Chang and Cai, Peiliang and Zheng, Shikang and Shi, Zhengyi and Feng, Liang and Zhang, Linfeng}, title = {Beyond Fixed Formulas: Data-Driven Linear Predictor for Efficient Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30792-30801} }
Diversity over Uniformity: Rethinking Representation in Generated Image Detection-
[pdf]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Qin, Qiao and Liu, Bo and Bi, Xiuli and Xiao, Bin}, title = {Diversity over Uniformity: Rethinking Representation in Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40407-40417} }
MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Che_2026_CVPR, author = {Che, Lirong and Wen, Shuo and Huang, Shan and Wang, Chuang and Yang, Yuzhe and Dudek, Gregory and Wang, Xueqian and Su, Jian}, title = {MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37175-37185} }
See What I Mean: Aligning Vision and Language Representations for Video Fine-grained Object Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Boyuan and Yin, Bo-Wen and Li, Yuan-Ming and Wei, Xihan and Hou, Qibin}, title = {See What I Mean: Aligning Vision and Language Representations for Video Fine-grained Object Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36814-36827} }
VirtueBench: Evaluating Trustworthiness under Uncertainty in Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xueqing and Li, Bohan and Li, Yan and Yang, Zhenheng}, title = {VirtueBench: Evaluating Trustworthiness under Uncertainty in Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40581-40590} }
VLA Models Are More Generalizable Than You Think: Revisiting Physical and Spatial Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiqi and Zhang, Quande and Zhai, Ruifeng and Lin, Liang and Wang, Guangrun}, title = {VLA Models Are More Generalizable Than You Think: Revisiting Physical and Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35025-35035} }
ResCa: Residual Caching for Diffusion Transformers Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Haipeng and Li, Yu and Tang, Fan and Lu, Yixing and Cao, Juan and Tang, Sheng}, title = {ResCa: Residual Caching for Diffusion Transformers Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32957-32966} }
RECS4R: Bridging Semantics and Geometry for Referring Remote Sensing Interpretation-
[pdf]
[supp]
[bibtex]@InProceedings{Chai_2026_CVPR, author = {Chai, Jinming and Li, Lingling and Jiao, Licheng and Lu, Xiaoqiang and Sun, Long and Liu, Xu and Ma, Wenping and Li, Weibin}, title = {RECS4R: Bridging Semantics and Geometry for Referring Remote Sensing Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42213-42224} }
Universal Guideline-Driven Image Clustering via a Hybrid LLM Agent-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Wenliang and Barton, Rob and Goncalves, Lucas and Kumar, Kushal and Jiang, Feng and Ma, Hehuan and Guo, Yuzhi and Bansal, Vidit and Bouyarmane, Karim and Huang, Junzhou}, title = {Universal Guideline-Driven Image Clustering via a Hybrid LLM Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33749-33760} }
GOR-IS: 3D Gaussian Object Removal In the Intrinsic Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yonghao and Gao, Yupeng and Yang, Jian and Xie, Jin and Wang, Beibei}, title = {GOR-IS: 3D Gaussian Object Removal In the Intrinsic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40896-40906} }
IMU-HOI: A Symbiotic Framework for Coherent Human-Object Interaction and Motion Capture via Contact-Conscious Inertial Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Lizhou and Xia, Songpengcheng and Lai, Zengyuan and Sun, Lan and Yang, Jiarui and Pei, Ling}, title = {IMU-HOI: A Symbiotic Framework for Coherent Human-Object Interaction and Motion Capture via Contact-Conscious Inertial Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42901-42910} }
DSO: Direct Steering Optimization for Bias Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paes_2026_CVPR, author = {Paes, Lucas Monteiro and Sivakumar, Nivedha and Wang, Yinong Oliver and Fedzechkina, Masha and Theobald, Barry-John and Zappella, Luca and Apostoloff, Nicholas}, title = {DSO: Direct Steering Optimization for Bias Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31714-31724} }
Graph Attention Prototypical Network for Robust Few-Shot Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tingyun and Liu, Licheng and Zhang, Qibin and Feng, Qiying and Chen, C. L. Philip}, title = {Graph Attention Prototypical Network for Robust Few-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33933-33942} }
Dynamic Magic: Unleashing Restricted Knowledge for Lifelong Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Jinjia and Tan, Jican and Yu, Jiazuo and Tao, Zeze and Wang, Huibing}, title = {Dynamic Magic: Unleashing Restricted Knowledge for Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32278-32287} }
PnP-CM: Consistency Models as Plug-and-Play Priors for Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gulle_2026_CVPR, author = {Gulle, Merve and Yun, Junno and Alcalar, Yasar Utku and Akcakaya, Mehmet}, title = {PnP-CM: Consistency Models as Plug-and-Play Priors for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38464-38474} }
Remote Sensing Image Super-Resolution for Imbalanced Textures: A Texture-Aware Diffusion Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Enzhuo and Zhao, Sijie and Muhtar, Dilxat and Li, Zhenshi and Zhang, Xueliang and Xiao, Pengfeng}, title = {Remote Sensing Image Super-Resolution for Imbalanced Textures: A Texture-Aware Diffusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38238-38247} }
Hist2Style: Histogram-Guided Stylization with Bilateral Grids-
[pdf]
[supp]
[bibtex]@InProceedings{Galor_2026_CVPR, author = {Galor, Dekel and Pikielny, Adam and Zhang, Zhoutong and Wang, Ke and Waller, Laura and Chen, Jiawen and Chugunov, Ilya}, title = {Hist2Style: Histogram-Guided Stylization with Bilateral Grids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29717-29726} }
Mamba Learns in Context: Structure-Aware Domain Generalization for Multi-Task Point Cloud Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jincen and Zhou, Qianyu and Li, Yuhang and Su, Kui and Wang, Meili and Chang, Jian and Zhang, Jian Jun and Lu, Xuequan}, title = {Mamba Learns in Context: Structure-Aware Domain Generalization for Multi-Task Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39098-39110} }
MatchED: Crisp Edge Detection Using End-to-End, Matching-based Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cetinkaya_2026_CVPR, author = {Cetinkaya, Bedrettin and Kalkan, Sinan and Akbas, Emre}, title = {MatchED: Crisp Edge Detection Using End-to-End, Matching-based Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42093-42103} }
Robust Promptable Video Object Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Sohyun and Gwon, Yeho and Hoyer, Lukas and Schindler, Konrad and Sakaridis, Christos and Kwak, Suha}, title = {Robust Promptable Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39838-39847} }
Color-Encoded Illumination for High-Speed Volumetric Scene Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Novikov_2026_CVPR, author = {Novikov, David and Vaknin, Eilon and Tumanyan, Narek and Sheinin, Mark}, title = {Color-Encoded Illumination for High-Speed Volumetric Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41784-41793} }
SAG-GNN: Semantic-Aware Guided GNN for Descriptor-Free 2D-3D Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shihua and Xu, Tianhao and Li, Zizhuo and Ma, Qing and Ma, Jiayi}, title = {SAG-GNN: Semantic-Aware Guided GNN for Descriptor-Free 2D-3D Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31410-31420} }
Towards Storytelling Animations: Joint Synthesis of Human and Camera Motions-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Boyuan and Xi, Yingjie and He, Rui and Na, Jinhe and Cao, Ying and Wang, Pengjie and Zhang, Jian J. and Yang, Xiaosong}, title = {Towards Storytelling Animations: Joint Synthesis of Human and Camera Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38376-38386} }
Reasoning-Driven Anomaly Detection and Localization with Image-Level Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Yizhou and Feng, Yuezhu and Zhang, Jinjin and Wang, Peng and Liu, Qingjie and Wang, Yunhong}, title = {Reasoning-Driven Anomaly Detection and Localization with Image-Level Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43061-43071} }
DEVA: Fine-tuning Multimodal Large Language Models for Visual Perception Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Das_2026_CVPR, author = {Das, Debasmit and Hayat, Munawar and Porikli, Fatih}, title = {DEVA: Fine-tuning Multimodal Large Language Models for Visual Perception Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39141-39151} }
MV-Fashion: Towards Enabling Virtual Try-On and Size Estimation with Multi-View Paired Data-
[pdf]
[supp]
[bibtex]@InProceedings{Laczko_2026_CVPR, author = {Laczk\'o, Hunor and Jia, Libang and Truong, Loc-Phat and Hern\'andez, Diego and Escalera, Sergio and Gonzalez, Jordi and Madadi, Meysam}, title = {MV-Fashion: Towards Enabling Virtual Try-On and Size Estimation with Multi-View Paired Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42810-42823} }
UAV-CB: A Complex-Background RGB-T Dataset and Local Frequency Bridge Network for UAV Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Shenghui and Hu, Menghao and Zou, Longkun and Chi, Hongyu and Li, Zekai and Gao, Feng and Yang, Fan and Wu, Qingyao and Chen, Ke}, title = {UAV-CB: A Complex-Background RGB-T Dataset and Local Frequency Bridge Network for UAV Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40468-40478} }
Generalizable Sparse-View 3D Reconstruction from Unconstrained Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gupta_2026_CVPR, author = {Gupta, Vinayak and Lin, Chih-Hao and Wang, Shenlong and Bhattad, Anand and Huang, Jia-Bin}, title = {Generalizable Sparse-View 3D Reconstruction from Unconstrained Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33217-33226} }
MoDES: Accelerating Mixture-of-Experts Multimodal Large Language Models via Dynamic Expert Skipping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yushi and Wang, Zining and Yuan, Zhihang and Ding, Yifu and Gong, Ruihao and Guo, Jinyang and Liu, Xianglong and Zhang, Jun}, title = {MoDES: Accelerating Mixture-of-Experts Multimodal Large Language Models via Dynamic Expert Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30205-30215} }
SearchAD: Large-Scale Rare Image Retrieval Dataset for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Embacher_2026_CVPR, author = {Embacher, Felix and Uhrig, Jonas and Cordts, Marius and Enzweiler, Markus}, title = {SearchAD: Large-Scale Rare Image Retrieval Dataset for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33088-33098} }
Space-Time Forecasting of Dynamic Scenes with Motion-aware Gaussian Grouping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junmyeong and Choi, Hoseung and Cho, Minsu}, title = {Space-Time Forecasting of Dynamic Scenes with Motion-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41033-41043} }
Nonlinear Color Transfer via Learnable Bezier Flows-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junhyoung and Jo, Seongwoon and Park, JeongHun and Ryou, Yeonji and Yang, Jeongha and Kim, Jangho}, title = {Nonlinear Color Transfer via Learnable Bezier Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41741-41751} }
DreamOmni2: Multimodal Instruction-based Generation and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Bin and peng, bohao and Zhang, Yuechen and Huang, Junjia and Liu, Jiyang and Li, Jingyao and Tan, Haoru and Wu, Sitong and Wang, Chengyao and Wang, Yitong and Yu, Bei and Jia, Jiaya}, title = {DreamOmni2: Multimodal Instruction-based Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29275-29284} }
WiseEdit: Benchmarking Cognition- and Creativity-Informed Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Kaihang and Chen, Weile and Qiu, Haiyi and Yu, Qifan and Bu, Wendong and Wang, Zehan and Zhu, Yun and Li, Juncheng and Tang, Siliang}, title = {WiseEdit: Benchmarking Cognition- and Creativity-Informed Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37268-37278} }
Interpretable Prompts made Edit-Friendly: Token-to-Token Similarity Reduction in dLLMs for Edit-Friendly Hard Prompt Inversion-
[pdf]
[supp]
[bibtex]@InProceedings{Devulapally_2026_CVPR, author = {Devulapally, Naresh Kumar and Agarwal, Shruti and Asnani, Vishal and Lokhande, Vishnu Suresh}, title = {Interpretable Prompts made Edit-Friendly: Token-to-Token Similarity Reduction in dLLMs for Edit-Friendly Hard Prompt Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43290-43299} }
Topology-aware Feature Propagation for Unsupervised Non-rigid Point Cloud Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Haozhe and Li, Rui and Wang, Zhengbao and Zhu, Xinhao and Li, Linjie and Xiong, Tianyu and Ouyang, Xuan and Yang, Jiaqi}, title = {Topology-aware Feature Propagation for Unsupervised Non-rigid Point Cloud Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31389-31399} }
LifeEval: A Multimodal Benchmark for Assistive AI in Egocentric Daily Life Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Hengjian and Zhang, Kaiwei and Wang, Shibo and Chen, Mingjie and Cao, Qihang and Wang, Xianfeng and Zhu, Yucheng and Min, Xiongkuo and Sun, Wei and Zhu, Dandan and Zhai, Guangtao}, title = {LifeEval: A Multimodal Benchmark for Assistive AI in Egocentric Daily Life Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32892-32902} }
Decision Boundary-aware Generation for Long-tailed Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiacheng and Zhang, Ruichi and Shang, Chikai and Li, Mengke and Shang, Xinyi and Gao, Junlong and Zhang, Yonggang and Lu, Yang}, title = {Decision Boundary-aware Generation for Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29441-29450} }
Attribution-Guided Model Rectification of Unreliable Neural Network Behaviors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Peiyu and Akhtar, Naveed and Jiang, Jiantong and Mian, Ajmal}, title = {Attribution-Guided Model Rectification of Unreliable Neural Network Behaviors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38979-38989} }
DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Zehong and Wei, Longhui and Wang, Shuai and Zhang, Shiliang and Tian, Qi}, title = {DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43600-43610} }
QuadSync: Quadrifocal Tensor Synchronization via Tucker Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2026_CVPR, author = {Miao, Daniel and Lerman, Gilad and Kileel, Joe}, title = {QuadSync: Quadrifocal Tensor Synchronization via Tucker Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28786-28795} }
RealVLG-R1: A Large-Scale Real-World Visual-Language Grounding Benchmark for Robotic Perception and Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Linfei and Zhang, Lin and Shen, Ying}, title = {RealVLG-R1: A Large-Scale Real-World Visual-Language Grounding Benchmark for Robotic Perception and Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42396-42407} }
Test-Time Instance-Specific Parameter Composition: A New Paradigm for Adaptive Generative Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Minh-Tuan and Le, Xuan-May and Tran, Quan Hung and Harandi, Mehrtash and Phung, Dinh and Le, Trung}, title = {Test-Time Instance-Specific Parameter Composition: A New Paradigm for Adaptive Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37982-37991} }
LA-Pose: Latent Action Pretraining Meets Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhengqing and Nair, Saurabh and Chidananda, Prajwal and Kachana, Pujith and Li, Samuel and Brown, Matthew and Furukawa, Yasutaka}, title = {LA-Pose: Latent Action Pretraining Meets Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34460-34469} }
Motion-Aware Animatable Gaussian Avatars Deblurring-
[pdf]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Muyao and Zhan, Yifan and Zhu, Qingtian and Li, Zhuoxiao and Wang, Wei and Zhong, Zhihang and Sun, Xiao and Zheng, Yinqiang}, title = {Motion-Aware Animatable Gaussian Avatars Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40140-40151} }
FastEventDGS: Deformable Gaussian Splatting for Fast Dynamic Scenes from a Single Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Zijia and Messikommer, Nico and Zou, Rong and Zubic, Nikola and Scaramuzza, Davide and Kneip, Laurent}, title = {FastEventDGS: Deformable Gaussian Splatting for Fast Dynamic Scenes from a Single Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29537-29546} }
Bridging Fidelity-Reality with Controllable One-Step Diffusion for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hao and Chen, Junyang and Pan, Jinshan and Dong, Jiangxin}, title = {Bridging Fidelity-Reality with Controllable One-Step Diffusion for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30584-30594} }
ACE-Merging: Data-Free Model Merging with Adaptive Covariance Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Wu, Haotian and Lin, Hehai and Huang, Weiquan and Zhu, Beier and Shu, Yao and Qin, Chengwei}, title = {ACE-Merging: Data-Free Model Merging with Adaptive Covariance Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29472-29482} }
Hyperbolic Busemann Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ziheng and Sch\"olkopf, Bernhard and Sebe, Nicu}, title = {Hyperbolic Busemann Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42029-42038} }
TTAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaxiong and Tan, Zhen and Zhang, Jinpu and Zhou, Yi and Shen, Hui and Chen, Xieyuanli and Hu, Dewen}, title = {TTAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37053-37062} }
FVAR: Next-Focus Prediction for Visual Autoregressive Modeling-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaofan and Wu, Chenming and Sun, Yanpeng and Zhou, Jiaming and Qu, Delin and Qu, Yansong and Bo, Weihao and Yu, Haibao and Liang, Dingkang}, title = {FVAR: Next-Focus Prediction for Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30391-30401} }
OLATverse: A Large-scale Real-world Object Dataset with Precise Lighting Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xilong and Chen, Jianchun and Rao, Pramod and Teufel, Timo and Lyu, Linjie and Minasian, Tigran and Sotnychenko, Oleksandr and Long, Xiao-Xiao and Habermann, Marc and Theobalt, Christian}, title = {OLATverse: A Large-scale Real-world Object Dataset with Precise Lighting Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28848-28859} }
Landscape-Awareness for Geometric View Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yan-Ting and Chen, Hao-Wei and Hsiao, Tsu-Ching and Lee, Chun-Yi}, title = {Landscape-Awareness for Geometric View Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38475-38486} }
One Algorithm to Align Them All-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2026_CVPR, author = {Pang, Boyi and Ignatyev, Savva and Ippolitov, Vladimir and Khafizov, Ramil and Melnik, Yurii and Voynov, Oleg and Nakhodnov, Maksim and Alanov, Aibek and Fan, Xiaopeng and Wonka, Peter and Burnaev, Evgeny}, title = {One Algorithm to Align Them All}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30446-30456} }
Beyond [CLS] Token: Query-Driven Token-Level Forgery Purification for Generalizable Deepfake Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Changshuo and Wang, Jiangming and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong and Wang, Shunli and Yi, Ran and Ma, Lizhuang}, title = {Beyond [CLS] Token: Query-Driven Token-Level Forgery Purification for Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42922-42931} }
Predict Before You Explore: Predictive Planning with Specialized Memory for Embodied Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Bowen and You, Sisi and Bao, Bing-Kun}, title = {Predict Before You Explore: Predictive Planning with Specialized Memory for Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29610-29619} }
RenderFlow: Single-Step Neural Rendering via Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shenghao and Liu, Runtao and Schroers, Christopher and Zhang, Yang}, title = {RenderFlow: Single-Step Neural Rendering via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40602-40611} }
RDF-MIG: A Robust Diffusion Framework for Masked Image Generation to Augment Semantic Segmentation and Change Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Zian and Wei, Wei and Gao, Qingshan and Fu, Yuanyuan}, title = {RDF-MIG: A Robust Diffusion Framework for Masked Image Generation to Augment Semantic Segmentation and Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35758-35767} }
iLRM: An Iterative Large 3D Reconstruction Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Gyeongjin and Nam, Seungtae and Yang, Seungkwon and Sun, Xiangyu and Khamis, Sameh and Mohamed, Abdelrahman and Park, Eunbyung}, title = {iLRM: An Iterative Large 3D Reconstruction Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37332-37342} }
Unified Multimodal Models as Auto-Encoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Zhiyuan and Lin, Kaiqing and Li, Zongjian and Ye, Junyan and Han, Hui and Wang, Haochen and Wang, Zhendong and Lin, Bin and Li, Hao and Xiao, Xinyan and Wang, Jingdong and Wang, Haifeng and Yuan, Li}, title = {Unified Multimodal Models as Auto-Encoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41903-41912} }
Test-Time 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Fengyi and Sun, Xiangyu and Yang, Huitong and Zhang, Zheng and Huang, Zi and Luo, Yadan}, title = {Test-Time 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35691-35701} }
DGS: Dual Gradient and Semantic-Shift Guided Low-Rank Adaptation for Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kai and Li, Jiafeng and He, Lianghua and Wen, Ying}, title = {DGS: Dual Gradient and Semantic-Shift Guided Low-Rank Adaptation for Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32267-32277} }
Robo-SGG: Exploiting Layout-Oriented Normalization and Restitution Can Improve Robust Scene Graph Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Changsheng and Fu, Zijian and Qi, Mengshi}, title = {Robo-SGG: Exploiting Layout-Oriented Normalization and Restitution Can Improve Robust Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39282-39292} }
Vision-Language Model Guided Source-Free Domain Adaptation via Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Shuo and Tang, Xu and Ma, Jingjing and Zhang, Xiangrong}, title = {Vision-Language Model Guided Source-Free Domain Adaptation via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36989-36998} }
Adapting Lightweight Image-based Counting Models for Video Crowd Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Shu_2026_CVPR, author = {Shu, Weibo and Chan, Antoni B.}, title = {Adapting Lightweight Image-based Counting Models for Video Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35227-35237} }
Spot The Ball: A Benchmark for Visual Social Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Balamurugan_2026_CVPR, author = {Balamurugan, Neha and Wu, Sarah and Eyzaguirre, Cristobal and Gerstenberg, Tobias}, title = {Spot The Ball: A Benchmark for Visual Social Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30919-30928} }
VisMem: Latent Vision Memory Unlocks Potential of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xinlei and Xu, Chengming and Zhang, Guibin and Chen, Zhangquan and Zhang, Yudong and He, Yongbo and Jiang, Peng-Tao and Zhang, Jiangning and Hu, Xiaobin and Yan, Shuicheng}, title = {VisMem: Latent Vision Memory Unlocks Potential of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31544-31555} }
Multi-Modal Representation Learning via Semi-Supervised Rate Reduction for Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Wei and Meng, Xianghan and Huang, Zhiyuan and Qi, Xianbiao and Xiao, Rong and Li, Chun-Guang}, title = {Multi-Modal Representation Learning via Semi-Supervised Rate Reduction for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39637-39646} }
4DWorldBench: A Comprehensive Evaluation Framework for 3D/4D World Generation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yiting and Luo, Wei and Tu, Peiyan and Li, Haoran and Zhu, Hanxin and Yu, Zihao and Wang, Xingrui and Chen, Xinyi and Peng, Xinge and Li, Xin and Chen, Zhibo}, title = {4DWorldBench: A Comprehensive Evaluation Framework for 3D/4D World Generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34322-34332} }
TaskIT: Memory-Efficient Fine-Tuning of Multi-LoRA LLMs via Cross-Task Importance Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Cheng and Zhou, Zimu and Ma, Ke and Guo, Bin}, title = {TaskIT: Memory-Efficient Fine-Tuning of Multi-LoRA LLMs via Cross-Task Importance Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37010-37021} }
URICA: A Uniformity Region Affine Identifier Capture Algorithm for Arbitrary Region Retrieval in Pathology Images-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Ri and Chen, Zhao and Cao, Caleb Chen and Chen, Lei}, title = {URICA: A Uniformity Region Affine Identifier Capture Algorithm for Arbitrary Region Retrieval in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32777-32786} }
FedMOP: Achieving Enhanced Privacy and Performance in Federated Learning via Momentum Orthogonal Projection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yunlong and Deng, Xiaoheng and Xu, Hongyan and Qiu, Zhuohua and Hu, Xiaowen and You, Shan and Chen, Yi and Xu, Chang and Su, Xiu}, title = {FedMOP: Achieving Enhanced Privacy and Performance in Federated Learning via Momentum Orthogonal Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39425-39434} }
Scenes as Tokens: Multi-Scale Normal Distributions Transform Tokenizer for General 3D Vision-Language Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yutao and Zhao, Cheng and Mittal, Gaurav and Kukkala, Rohith and Chellappa, Rama and Peng, Cheng and Chen, Mei}, title = {Scenes as Tokens: Multi-Scale Normal Distributions Transform Tokenizer for General 3D Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38584-38594} }
ViterbiPlanNet: Injecting Procedural Knowledge via Differentiable Viterbi for Planning in Instructional Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seminara_2026_CVPR, author = {Seminara, Luigi and Moltisanti, Davide and Furnari, Antonino}, title = {ViterbiPlanNet: Injecting Procedural Knowledge via Differentiable Viterbi for Planning in Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31240-31249} }
A Mixed Diet Makes DINO An Omnivorous Vision Encoder-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kabra_2026_CVPR, author = {Kabra, Rishabh and Ovsjanikov, Maks and Hudson, Drew A. and Xia, Ye and Koppula, Skanda and Araujo, Andre and Carreira, Joao and Mitra, Niloy J.}, title = {A Mixed Diet Makes DINO An Omnivorous Vision Encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36850-36860} }
HiDRA: Hierarchical Degradation Representation and Adaptation with Generative Priors for Enhancing Infrared Vision-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zihang and Liu, Zhu and Yan, Changbo and Liu, Jinyuan and Liu, Risheng}, title = {HiDRA: Hierarchical Degradation Representation and Adaptation with Generative Priors for Enhancing Infrared Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37434-37444} }
VRR-QA: Visual Relational Reasoning in Videos Beyond Explicit Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Swetha_2026_CVPR, author = {Swetha, Sirnam and Gupta, Rohit and Kulkarni, Parth Parag and Shatwell, David G and A Chan Santiago, Jeffrey and Siddiqui, Nyle and Fioresi, Joseph and Shah, Mubarak}, title = {VRR-QA: Visual Relational Reasoning in Videos Beyond Explicit Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32840-32849} }
ESAM++: Efficient Online 3D Perception on the Edge-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Qin and Aggarwal, Lavisha and Bandyopadhyay, Saptarashmi and Bahirwani, Vikas and Niethammer, Marc and Adeli, Ehsan and Colaco, Andrea}, title = {ESAM++: Efficient Online 3D Perception on the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39021-39030} }
Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hanchao and Zhang, Fang-Lue and Zhang, Shining and Mu, Tai-Jiang and Hu, Shi-Min}, title = {Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38291-38301} }
CTCal: Rethinking Text-to-Image Diffusion Models via Cross-Timestep Self-Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Xiefan and Ma, Xinzhu and Zhang, Haiyu and Huang, Di}, title = {CTCal: Rethinking Text-to-Image Diffusion Models via Cross-Timestep Self-Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43558-43567} }
StereoWorld: Geometry-Aware Monocular-to-Stereo Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Ke and Li, Longfei and Yin, Yuyang and Liang, Hanwen and Luo, Guixun and Fang, Chen and Wang, Jue and Plataniotis, Konstantinos N. and Jin, Xiaojie and Zhao, Yao and Wei, Yunchao}, title = {StereoWorld: Geometry-Aware Monocular-to-Stereo Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40245-40255} }
Low-Rank Test-Time Training for Pre-Trained Point Cloud Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Ouyangzi and Shao, Feifei and Li, Kexin and Luo, Yawei and Song, Zikai and Liu, Ping and Zhang, Fengda and Wang, Hongwei and Xiao, Jun}, title = {Low-Rank Test-Time Training for Pre-Trained Point Cloud Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31472-31481} }
AgentDet: A Shared-Blackboard Multi-Agent Framework for Zero-/Few-Shot Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haolin and Wang, Yaohua and Yan, Ze and Wen, Lijie and Huang, Biqing}, title = {AgentDet: A Shared-Blackboard Multi-Agent Framework for Zero-/Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41626-41635} }
Pixel2Phys: Distilling Governing Laws from Visual Dynamics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ruikun and Yao, Jun and Hua, Yingfan and Tang, Shixiang and Qi, Biqing and Liu, Bin and Ouyang, Wanli and Lu, Yan}, title = {Pixel2Phys: Distilling Governing Laws from Visual Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41426-41435} }
Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bi_2026_CVPR, author = {Bi, Tianci and Zhang, Xiaoyi and Lu, Yan and Zheng, Nanning}, title = {Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43310-43319} }
EVLF: Early Vision-Language Fusion for Generative Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Wenqi and Zou, Yawen and Li, Guang and Gu, Chunzhi and Zhang, Chao}, title = {EVLF: Early Vision-Language Fusion for Generative Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33953-33962} }
UFVideo: Towards Unified Fine-Grained Video Cooperative Understanding with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Hewen and Wei, Cong and Liang, Dashuang and Huang, Zepeng and Gao, Pengfei and Zhou, Ziqi and Xue, Lulu and Yan, Pengfei and Wei, Xiaoming and Li, Minghui and Hu, Shengshan}, title = {UFVideo: Towards Unified Fine-Grained Video Cooperative Understanding with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31833-31844} }
Which Concepts to Forget and How to Refuse? Decomposing Concepts for Continual Unlearning in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Hyundong and Han, Dongyoon and Kim, Eunwoo}, title = {Which Concepts to Forget and How to Refuse? Decomposing Concepts for Continual Unlearning in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32288-32298} }
SR3R: Rethinking Super-Resolution 3D Reconstruction With Feed-Forward Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Xiang and Wang, Xiangbo and Zhong, Tieshi and Wang, Chengkai and Zhao, Yiting and Xu, Tianxiang and Kuang, Zhenzhong and Qin, Feiwei and Yin, Xuefei and Zhu, Yanming}, title = {SR3R: Rethinking Super-Resolution 3D Reconstruction With Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33384-33393} }
WeatherCity: Urban Scene Reconstruction with Controllable Multi-Weather Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Wenhua and Guan, Huai and Liu, Zhe and Wang, Hesheng}, title = {WeatherCity: Urban Scene Reconstruction with Controllable Multi-Weather Transformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40949-40958} }
TM-BSN: Triangular-Masked Blind-Spot Network for Real-World Self-Supervised Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Junyoung and Oh, Youngjin and Cho, Nam Ik}, title = {TM-BSN: Triangular-Masked Blind-Spot Network for Real-World Self-Supervised Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29877-29886} }
SoPE: Spherical Coordinate-Based Positional Embedding for Enhancing Spatial Perception of 3D LVLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Yip_2026_CVPR, author = {Yip, Koonting and Zhao, Qiyan and Yu, Wenhao and Yuen, Liangyu and Li, Mingkai and Zhang, Xiaofeng and Ji, Jianmin and Zhang, Yanyong and Jiang, Qing and Yuen, Ka-Veng}, title = {SoPE: Spherical Coordinate-Based Positional Embedding for Enhancing Spatial Perception of 3D LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33714-33726} }
Forecasting 3D Scanpaths in Egocentric Video-
[pdf]
[supp]
[bibtex]@InProceedings{Ryan_2026_CVPR, author = {Ryan, Fiona and Ananthabhotla, Ishwarya and Qian, Yijun and Hoffman, Judy and Rehg, James M. and Ithapu, Vamsi Krishna and Murdock, Calvin}, title = {Forecasting 3D Scanpaths in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42824-42835} }
SimLBR: Learning to Detect Fake Images by Learning to Detect Real Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dhakal_2026_CVPR, author = {Dhakal, Aayush and Khanal, Subash and Sastry, Srikumar and Arndt, Jacob and Dias, Philipe and Lunga, Dalton and Jacobs, Nathan}, title = {SimLBR: Learning to Detect Fake Images by Learning to Detect Real Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35472-35482} }
Boosting Visual Reprogramming for CLIP with Dual Granularity Alignment-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jiayang and Chen, Xinyang and Lv, Ke and Guan, Weili}, title = {Boosting Visual Reprogramming for CLIP with Dual Granularity Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29347-29356} }
Phased DMD: Few-step Distribution Matching Distillation via Score Matching within Subintervals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Xiangyu and Qiu, Zesong and Wu, Zhuguanyu and Wang, Fanzhou and Lin, Zhiqian and Ren, Tianxiang and Lin, Dahua and Gong, Ruihao and Yang, Lei}, title = {Phased DMD: Few-step Distribution Matching Distillation via Score Matching within Subintervals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41667-41676} }
FaceCam: Portrait Video Camera Control via Scale-Aware Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Weijie and Yang, Ming-Hsuan and Shu, Zhixin}, title = {FaceCam: Portrait Video Camera Control via Scale-Aware Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30607-30617} }
ORION: ORthonormal Text Encoding for Universal VLM AdaptatION-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chakraborty_2026_CVPR, author = {Chakraborty, Omprakash and Dolz, Jose and Ben Ayed, Ismail}, title = {ORION: ORthonormal Text Encoding for Universal VLM AdaptatION}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31556-31565} }
Mobile-VTON: High-Fidelity On-Device Virtual Try-On-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Zhenchen and Chen, Ce and Lin, Runqi and Huang, Jiaxin and Chen, Tianxi and Xu, Yanwu and Liu, Tongliang and Gong, Mingming}, title = {Mobile-VTON: High-Fidelity On-Device Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38081-38090} }
RINO: Rotation-Invariant Non-Rigid Correspondences-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Maolin and Hu-Chen, Shao Jie and Deng, Congyue and Marin, Riccardo and Guibas, Leonidas and Cremers, Daniel}, title = {RINO: Rotation-Invariant Non-Rigid Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34683-34693} }
IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Hao and Luo, Xiangyang and Wang, Hao and Zhang, Jiawei and Zhang, Yi and Wang, Jinwei}, title = {IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32967-32977} }
DualReg: Dual-Space Filtering and Reinforcement for Rigid Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Yao, Yuxin and Lu, Qiuhang and Zhang, Juyong}, title = {DualReg: Dual-Space Filtering and Reinforcement for Rigid Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39031-39041} }
AdaSFormer: Adaptive Serialized Transformers for Monocular Semantic Scene Completion from Indoor Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuzhi and Wu, Xinran and Wang, Song and Kong, Lingdong and Zhao, Ziping}, title = {AdaSFormer: Adaptive Serialized Transformers for Monocular Semantic Scene Completion from Indoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34148-34159} }
TimeViper: A Hybrid Mamba-Transformer Vision-Language Model for Efficient Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Boshen and Xiao, Zihan and Li, Jiaze and Ju, Jianzhong and Luo, Zhenbo and Luan, Jian and Jin, Qin}, title = {TimeViper: A Hybrid Mamba-Transformer Vision-Language Model for Efficient Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40479-40493} }
EfficientVPR: Toward Efficient Visual Place Recognition via Scene-Aware Prompt Tuning and Adaptive Feature Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Wenjing and Yang, Chuanguang and An, Zhulin and Huang, Libo and Diao, Boyu and Xu, Yongjun}, title = {EfficientVPR: Toward Efficient Visual Place Recognition via Scene-Aware Prompt Tuning and Adaptive Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33738-33748} }
MU-GeNeRF: Multi-view Uncertainty-guided Generalizable Neural Radiance Fields for Distractor-aware Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2026_CVPR, author = {Mu, Wenjie and Li, Zhan and Su, Chuanzhou and Shen, Xuanyi and Liu, Ziniu and Lu, Fan and Mo, Yujian and Zhao, Junqiao and Feng, Tiantian and Ye, Chen and Chen, Guang}, title = {MU-GeNeRF: Multi-view Uncertainty-guided Generalizable Neural Radiance Fields for Distractor-aware Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38049-38059} }
VQRAE: Representation Quantization Autoencoders for Multimodal Understanding, Generation and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Sinan and Guo, Jiahao and Li, Bo and Cui, Shuhao and Xu, Zhengzhuo and Luo, Yifu and Wei, Yongxian and Gai, Kun and Wang, Xinggang and Wu, Kai and Yuan, Chun}, title = {VQRAE: Representation Quantization Autoencoders for Multimodal Understanding, Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30322-30334} }
Sparse-LaViDa: Sparse Multimodal Discrete Diffusion Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shufan and Gu, Jiuxiang and Liu, Kangning and Lin, Zhe and Wei, Zijun and Grover, Aditya and Kuen, Jason}, title = {Sparse-LaViDa: Sparse Multimodal Discrete Diffusion Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36103-36114} }
FusionRegister: Every Infrared and Visible Image Fusion Deserves Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bian_2026_CVPR, author = {Bian, Congcong and Ma, Haolong and Li, Hui and Shen, Zhongwei and Luo, Xiaoqing and Song, Xiaoning and Wu, Xiao-jun}, title = {FusionRegister: Every Infrared and Visible Image Fusion Deserves Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41551-41561} }
Robustness Under Data Scarcity: Few-Shot Continual Adversarial Training for Evolving Threats-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Wenxuan and Wang, Chenglei and Yan, Chengzhi and Qian, Xuelin and Zhang, Yanning}, title = {Robustness Under Data Scarcity: Few-Shot Continual Adversarial Training for Evolving Threats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34908-34917} }
H^2A^2: Homogeneity-Aware and Heterogeneity-Aware Feature Perception for Unified Indoor 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Tao and An, Tao and Liu, Feng and Jin, Wensheng and Li, Zhengyu and Zhao, Lijun and Li, Ruifeng}, title = {H{\textasciicircum}2A{\textasciicircum}2: Homogeneity-Aware and Heterogeneity-Aware Feature Perception for Unified Indoor 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40717-40726} }
Mirai: Autoregressive Visual Generation Needs Foresight-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yonghao and Huang, Lang and Wang, Zerun and Li, Runyi and Yamasaki, Toshihiko}, title = {Mirai: Autoregressive Visual Generation Needs Foresight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30511-30520} }
EMMA: Concept Erasure Benchmark with Comprehensive Semantic Metrics and Diverse Categories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Lu and Nakashima, Yuta and Garcia, Noa}, title = {EMMA: Concept Erasure Benchmark with Comprehensive Semantic Metrics and Diverse Categories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37247-37257} }
Solving a Nonlinear Blind Inverse Problem for Tagged MRI with Physics and Deep Generative Priors-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bian_2026_CVPR, author = {Bian, Zhangxing and Wei, Shuwen and Remedios, Samuel W. and Chen, Junyu and Carass, Aaron and Dewey, Blake and Prince, Jerry L}, title = {Solving a Nonlinear Blind Inverse Problem for Tagged MRI with Physics and Deep Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41730-41740} }
Spatiotemporal Pyramid Flow Matching for Climate Emulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Irvin_2026_CVPR, author = {Irvin, Jeremy A. and Han, Jiaqi and Wang, Zikui and Alharbi, Abdulaziz and Zhao, Yufei and Bayarsaikhan, Nomin-Erdene and Visioni, Daniele and Ng, Andrew Y. and Watson-Parris, Duncan}, title = {Spatiotemporal Pyramid Flow Matching for Climate Emulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42180-42190} }
Factorized Context Aggregation for Robust Cancer Risk Estimation via Soft Re-Ranked Retrieval and Hierarchical Anchors-
[pdf]
[supp]
[bibtex]@InProceedings{Moghadam_2026_CVPR, author = {Moghadam, Puria Azadi and Mirabadi, Ali Khajegili and Maneshgar, Behnam and Farahani, Hossein and Bashashati, Ali}, title = {Factorized Context Aggregation for Robust Cancer Risk Estimation via Soft Re-Ranked Retrieval and Hierarchical Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34095-34105} }
Relightable Holoported Characters: Capturing and Relighting Dynamic Human Performance from Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Kunwar Maheep and Chen, Jianchun and Golyanik, Vladislav and Garbin, Stephan J. and Beeler, Thabo and Dabral, Rishabh and Habermann, Marc and Theobalt, Christian}, title = {Relightable Holoported Characters: Capturing and Relighting Dynamic Human Performance from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28882-28892} }
Match-and-Fuse: Consistent Generation from Unstructured Image Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feingold_2026_CVPR, author = {Feingold, Kate and Kaduri, Omri and Dekel, Tali}, title = {Match-and-Fuse: Consistent Generation from Unstructured Image Sets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30490-30499} }
Denoise and Align: Towards Source-Free UDA for Robust Panoramic Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Yaowen and Cao, Zhen and Zheng, Xu and Mi, Xiaoxin and Dong, Zhen}, title = {Denoise and Align: Towards Source-Free UDA for Robust Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32226-32235} }
tttLRM: Test-Time Training for Long Context and Autoregressive 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chen and Tan, Hao and Yifan, Wang and Chen, Zhiqin and Liu, Yuheng and Sunkavalli, Kalyan and Bi, Sai and Liu, Lingjie and Hu, Yiwei}, title = {tttLRM: Test-Time Training for Long Context and Autoregressive 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36582-36592} }
Designing Instance-Level Sampling Schedules via REINFORCE with James-Stein Shrinkage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Peiyu and Kothawade, Suraj and Xie, Sirui and Wu, Ying Nian and Fei, Hongliang}, title = {Designing Instance-Level Sampling Schedules via REINFORCE with James-Stein Shrinkage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36040-36050} }
MVInverse: Feed-forward Multiview Inverse Rendering in Seconds-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiangzuo and Ren, Chengwei and Zhou, Jun and Li, Xiu and Liu, Yuan}, title = {MVInverse: Feed-forward Multiview Inverse Rendering in Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37343-37357} }
MM-ACT: Learn from Multimodal Parallel Generation to Act-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Haotian and Chen, Xinyi and Wang, Bin and Chen, Mingkang and Liu, Yitian and Zhang, Yuhao and Chen, Zanxin and Yang, Tianshuo and Chen, Yilun and Pang, Jiangmiao and Liu, Dong and Yang, Xiaokang and Mu, Yao and Shao, Wenqi and Luo, Ping}, title = {MM-ACT: Learn from Multimodal Parallel Generation to Act}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35080-35090} }
CountGD++: Generalized Prompting for Open-World Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Amini-Naieni_2026_CVPR, author = {Amini-Naieni, Niki and Zisserman, Andrew}, title = {CountGD++: Generalized Prompting for Open-World Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37725-37734} }
Interpretable Debiasing of Vision-Language Models for Social Fairness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Na Min and Jang, Yoonna and Hirota, Yusuke and Hachiuma, Ryo and Augenstein, Isabelle and Shim, Hyunjung}, title = {Interpretable Debiasing of Vision-Language Models for Social Fairness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39326-39337} }
D3D-VLP: Dynamic 3D Vision-Language-Planning Model for Embodied Grounding and Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zihan and Lee, Seungjun and Dai, Guangzhao and Lee, Gim Hee}, title = {D3D-VLP: Dynamic 3D Vision-Language-Planning Model for Embodied Grounding and Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32463-32474} }
Property-Informed Diffusion-Based Text-to-Microstructure Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Bingxuan and Wang, Hongsong and Gui, Jie}, title = {Property-Informed Diffusion-Based Text-to-Microstructure Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36758-36768} }
Improving Diffusion Generalization with Weak-to-Strong Segmented Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Liangyu and Huang, Yufei and Lei, Mingkun and Zhao, Tong and Wang, Ruoyu and Chi, Changxi and Wang, Yiwei and Zhang, Chi}, title = {Improving Diffusion Generalization with Weak-to-Strong Segmented Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43697-43706} }
IDperturb: Enhancing Variation in Synthetic Face Generation via Angular Perturbations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boutros_2026_CVPR, author = {Boutros, Fadi and Caldeira, Eduarda and Chettaoui, Tahar and Damer, Naser}, title = {IDperturb: Enhancing Variation in Synthetic Face Generation via Angular Perturbations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40119-40129} }
ReFlow: Self-correction Motion Learning for Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Yanzhe and Zhu, Ruijie and Chang, Hanzhi and Li, Zhuoyuan and Lu, Jiahao and Zhang, Tianzhu}, title = {ReFlow: Self-correction Motion Learning for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29803-29813} }
Is Bin Generation Indispensable? A Bin-Generation-Free Dataset Quantization via Semantic Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Maijie and Li, Yuhua and Zou, Yixiong and Wu, Yao and Ma, Chenru}, title = {Is Bin Generation Indispensable? A Bin-Generation-Free Dataset Quantization via Semantic Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33642-33651} }
SimRecon: SimReady Compositional Scene Reconstruction from Real Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Zhu, Kai and Wang, Zizhuo and Liu, Fangfu and Zhang, Zhizheng and Duan, Yueqi}, title = {SimRecon: SimReady Compositional Scene Reconstruction from Real Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42452-42463} }
From Corners to Fiducial Tags: Revisiting Checkerboard Calibration for Event Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Ryu_2026_CVPR, author = {Ryu, Taehun and Kang, Changwoo and Joo, Kyungdon}, title = {From Corners to Fiducial Tags: Revisiting Checkerboard Calibration for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29557-29566} }
See and Fix the Flaws: Enabling VLMs and Diffusion Models to Comprehend Visual Artifacts via Agentic Data Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jaehyun and Ahn, Minyoung and Kim, Minkyu and Lee, Jonghyun and Lee, Jae-Gil and Park, Dongmin}, title = {See and Fix the Flaws: Enabling VLMs and Diffusion Models to Comprehend Visual Artifacts via Agentic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35810-35820} }
DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Junjia and Yang, Binbin and Yan, Pengxiang and Liu, Jiyang and Xia, Bin and Wang, Zhao and Wang, Yitong and Lin, Liang and Li, Guanbin}, title = {DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36736-36746} }
Do Vision-Language Models Measure Up? Benchmarking Visual Measurement Reading with MeasureBench-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Fenfen and Liu, Yesheng and Xu, Haiyu and Chen, Yue and He, Zheqi and Zhao, Mingxuan and Chen, Miguel Hu and Yao, Jin-Ge and Yang, Xi}, title = {Do Vision-Language Models Measure Up? Benchmarking Visual Measurement Reading with MeasureBench}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38544-38553} }
Compressed-Domain-Aware Online Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhang and Li, Hai and Hou, Shujuan and Dong, Zhetao and Yang, Xiaoyao}, title = {Compressed-Domain-Aware Online Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33621-33631} }
SVBench: Evaluation of Video Generation Models on Social Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Wenshuo and Wang, Gongxuan and Yang, Tianmeng and Li, Chuanhao and Xu, Xiaojie and He, Hui and Zhang, Kaipeng}, title = {SVBench: Evaluation of Video Generation Models on Social Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32872-32881} }
TESO: Online Tracking of Essential Matrix by Stochastic Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moravec_2026_CVPR, author = {Moravec, Jaroslav and Sara, Radim and Sugimoto, Akihiro}, title = {TESO: Online Tracking of Essential Matrix by Stochastic Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28819-28827} }
AGENTSAFE: Benchmarking the Safety of Embodied Agents on Hazardous Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2026_CVPR, author = {Ying, Zonghao and Wang, Le and Xiao, Yisong and Wang, Jiakai and Ma, Yuqing and Guo, Jinyang and Yin, Zhenfei and Zhang, Mingchuan and Liu, Aishan and Liu, Xianglong}, title = {AGENTSAFE: Benchmarking the Safety of Embodied Agents on Hazardous Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37664-37673} }
DiT-Distill: Open-Set Fine-Grained Retrieval via Generative Curriculum Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xin and Tang, Hao and Cao, Meiqi and Gao, Junyao and Shen, Fei and Li, Zechao}, title = {DiT-Distill: Open-Set Fine-Grained Retrieval via Generative Curriculum Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38753-38762} }
Mixture-of-Experts based Feature Decoupling for Open Vocabulary Scene Graph Generation-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yiming and You, Sisi and Bao, Bing-Kun}, title = {Mixture-of-Experts based Feature Decoupling for Open Vocabulary Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32206-32215} }
GP-4DGS: Probabilistic 4D Gaussian Splatting from Monocular Video via Variational Gaussian Processes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Mijeong and Kim, Jungtaek and Han, Bohyung}, title = {GP-4DGS: Probabilistic 4D Gaussian Splatting from Monocular Video via Variational Gaussian Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33394-33403} }
Language-guided Frequency Modulation for Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Shuyi and Fang, Gongfan and Ma, Xinyin and Chen, Yen-Wei and Lin, Lanfen and Wang, Xinchao}, title = {Language-guided Frequency Modulation for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39175-39185} }
Masked Representation Modeling for Domain-Adaptive Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wenlve and Zhou, Zhiheng and Xian, Tiantao and Zhai, Yikui and Wu, Weibin and MA, Biyun}, title = {Masked Representation Modeling for Domain-Adaptive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36999-37009} }
Hear What Matters! Text-conditioned Selective Video-to-Audio Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junwon and Nam, Juhan and Lee, Jiyoung}, title = {Hear What Matters! Text-conditioned Selective Video-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36680-36690} }
Exploring 6D Object Pose Estimation with Deformation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiqiang and Song, Rui and Chuangqi, Duanmu and Li, Jiaojiao and Ferstl, David and Hu, Yinlin}, title = {Exploring 6D Object Pose Estimation with Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33078-33087} }
DRS-GUI: Dynamic Region Search for Training-Free GUI Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yichao and Shen, Huawen and Yu, Liu and Liu, Shiyu and Chen, Zeyu and Zhou, Yu}, title = {DRS-GUI: Dynamic Region Search for Training-Free GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34606-34616} }
DiverseDiT: Towards Diverse Representation Learning in Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Mengping and Tan, Zhiyu and Li, Binglei and Yang, Xiaomeng and Chen, Hesen and Li, Hao}, title = {DiverseDiT: Towards Diverse Representation Learning in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40591-40601} }
SCIEval: Evaluating and Benchmarking the Faithfulness of Scientific Image Generation and Interpretation with Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Guanghui and Zhao, Huan and Zhao, Zhixue and Ma, Tengfei and Wang, Kehan and Eger, Steffen and Jiang, Zhihua}, title = {SCIEval: Evaluating and Benchmarking the Faithfulness of Scientific Image Generation and Interpretation with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29760-29770} }
SemLayer: Semantic-aware Generative Segmentation and Layer Construction for Abstract Icons-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Haiyang and Wu, Ronghuan and Wei, Li-Yi and Zhao, Nanxuan and Liu, Chenxi and Nguyen, Cuong and Tu, Zhuowen and Wang, Zhaowen}, title = {SemLayer: Semantic-aware Generative Segmentation and Layer Construction for Abstract Icons}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42082-42092} }
Noise-Aware Few-Shot Learning through Bi-directional Multi-View Prompt Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Lu and Xue, Cheng}, title = {Noise-Aware Few-Shot Learning through Bi-directional Multi-View Prompt Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41646-41656} }
GH-NAF: Grid-Adaptive Hash-Level-Attended Neural Attenuation Fields for Discrepancy-Aware CBCT-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Seong Je and Lee, Ju Hwan and Lim, Chae Yeon and Lee, Donghwan and Chung, Myung Jin and Kim, Kyungsu}, title = {GH-NAF: Grid-Adaptive Hash-Level-Attended Neural Attenuation Fields for Discrepancy-Aware CBCT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41762-41772} }
ActionMesh: Animated 3D Mesh Generation with Temporal 3D Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sabathier_2026_CVPR, author = {Sabathier, Remy and Novotny, David and Mitra, Niloy J. and Monnier, Tom}, title = {ActionMesh: Animated 3D Mesh Generation with Temporal 3D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34312-34321} }
Towards Intrinsic-Aware Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhihao and Kumar, Abhinav and Liu, Xiaoming}, title = {Towards Intrinsic-Aware Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40739-40750} }
Multimodal RewardBench 2: Evaluating Omni Reward Models for Interleaved Text and Image-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yushi and Askari-Hemmat, Reyhane and Hall, Melissa and Dinan, Emily and Zettlemoyer, Luke and Ghazvininejad, Marjan}, title = {Multimodal RewardBench 2: Evaluating Omni Reward Models for Interleaved Text and Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36904-36915} }
Accelerating Diffusion-based Video Editing via Heterogeneous Caching: Beyond Full Computing at Sampled Denoising Timestep-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tianyi and Lu, Ye and Zhang, Linfeng and Cai, Chen and Gao, Jianjun and Wang, Yi and Yap, Kim-Hui and Chau, Lap-Pui}, title = {Accelerating Diffusion-based Video Editing via Heterogeneous Caching: Beyond Full Computing at Sampled Denoising Timestep}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35800-35809} }
Plug-and-Play PDE Optimization for 3D Gaussian Splatting: Toward High-Quality Rendering and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2026_CVPR, author = {Mo, Yifan and Cai, Youcheng and Liu, Ligang}, title = {Plug-and-Play PDE Optimization for 3D Gaussian Splatting: Toward High-Quality Rendering and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33333-33342} }
UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zhuangcheng and Liang, Guang and Wang, Bin and Zhao, Zhiyuan and Zhang, Qintong and Li, Weijia and Xu, Chao and Zhang, Bo and Shi, Botian and Wu, Jiang and Zhang, Wentao and He, Conghui}, title = {UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34106-34115} }
Fourier Angle Alignment for Oriented Object Detection in Remote Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Changyu and Chen, Linwei and Gu, Lin and Fu, Ying}, title = {Fourier Angle Alignment for Oriented Object Detection in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42225-42235} }
SimScale: Learning to Drive via Real-World Simulation at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Haochen and Li, Tianyu and Liu, Haochen and Yang, Jiazhi and Qiu, Yihang and Li, Guang and Wang, Junli and Gao, Yinfeng and Zhang, Zhang and Wang, Liang and Ye, Hangjun and Chen, Long and Li, Hongyang}, title = {SimScale: Learning to Drive via Real-World Simulation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36365-36374} }
Unified Customized Generation by Disentangled Reward Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Shaojin and Huang, Mengqi and Cheng, Yufeng and Wu, Wenxu and Tian, Jiahe and Luo, Yiming and Ding, Fei and He, Qian}, title = {Unified Customized Generation by Disentangled Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34418-34427} }
AeroGS: Scale-Aware Gaussian Splatting for Pose-Free Dynamic UAV Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tingyun and Liu, Xinyi and Zhang, Yongjun and Wan, Yi and Liu, Xiaoan and Fan, Weiwei and Liu, Jiahao}, title = {AeroGS: Scale-Aware Gaussian Splatting for Pose-Free Dynamic UAV Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40907-40917} }
Learning to Control Physically-simulated 3D Characters via Generating and Mimicking 2D Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jianan and Chen, Xiao and Huang, Tao and Wong, Tien-Tsin}, title = {Learning to Control Physically-simulated 3D Characters via Generating and Mimicking 2D Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38302-38312} }
DuoMo: Dual Motion Diffusion for World-Space Human Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yufu and Ng, Evonne and Shin, Soyong and Khirodkar, Rawal and Dong, Yuan and Su, Zhaoen and Park, Jinhyung and Kitani, Kris and Richard, Alexander and Prada, Fabian and Zollh\"ofer, Michael}, title = {DuoMo: Dual Motion Diffusion for World-Space Human Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42777-42788} }
Towards Decompositional Human Motion Generation with Energy-Based Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jianrong and Fan, Hehe and Yang, Yi}, title = {Towards Decompositional Human Motion Generation with Energy-Based Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30650-30660} }
TAP: A Token-Adaptive Predictor Framework for Training-Free Diffusion Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Haowei and Huang, Tingxuan and Wang, Xing and Zhao, Tianyu and Wang, Jiexi and Chen, Weifeng and Peng, Xurui and Chen, Fangmin and Yong, Junhai and Wang, Bin}, title = {TAP: A Token-Adaptive Predictor Framework for Training-Free Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36082-36091} }
Debiased Sample Selection for Learning with Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Weiran and Wei, Wei and Xie, Wenfeng}, title = {Debiased Sample Selection for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32047-32057} }
SpiralDiff: Spiral Diffusion with LoRA for RGB-to-RAW Conversion Across Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Huanjing and Xie, Shangbin and Cao, Cong and Wu, Qian and Zhang, Lei and Zhao, Lei and Yang, Jingyu}, title = {SpiralDiff: Spiral Diffusion with LoRA for RGB-to-RAW Conversion Across Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38453-38463} }
Inter-Photon-Limited Videography-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Andrew and Du, Dongyu and Nousias, Sotiris and Lindell, David B. and Kutulakos, Kiriakos N.}, title = {Inter-Photon-Limited Videography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34006-34015} }
GeodesicNVS: Probability Density Geodesic Flow Matching for Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuqin and Wu, Tao and Zhang, Yanfeng and Liu, Lu and Sun, Mingwei and Wang, Yongliang and Zeller, Niclas and Cremers, Daniel}, title = {GeodesicNVS: Probability Density Geodesic Flow Matching for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40316-40326} }
InsCal: Calibrated Multi-Source Fully Test-Time Prompt Tuning for Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Que_2026_CVPR, author = {Que, Xiaofan and Wang, Dingrong and Liu, Xumin and Yu, Qi}, title = {InsCal: Calibrated Multi-Source Fully Test-Time Prompt Tuning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36936-36946} }
Mixture of States: Routing Token-Level Dynamics for Multimodal Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Haozhe and Liu, Ding and Zhuge, Mingchen and Zhou, Zijian and Xie, Tian and He, Sen and Yang, Yukang and Liu, Shuming and Cong, Yuren and Guo, Jiadong and Xu, Hongyu and Xu, Ke and Ng, Kam-Woh and Perez, Juan C. and Perez-Rua, Juan-Manuel and Xiang, Tao and Liu, Wei and Liu, Shikun and Schmidhuber, J\"urgen}, title = {Mixture of States: Routing Token-Level Dynamics for Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36781-36792} }
Bidirectional Multimodal Prompt Learning with Scale-Aware Training for Few-Shot Multi-Class Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yujin and Kim, Sewon and Moon, Daeun and Jang, Seoyoon and Yoon, Hyunsoo}, title = {Bidirectional Multimodal Prompt Learning with Scale-Aware Training for Few-Shot Multi-Class Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35577-35586} }
LaDy: Lagrangian-Dynamic Informed Network for Skeleton-based Action Segmentation via Spatial-Temporal Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Haoyu and Liu, Xueting and Gao, Yu and Huang, Wenze and Yang, Zhihao and Ren, Weihong and Wang, Zhiyong and Liu, Honghai}, title = {LaDy: Lagrangian-Dynamic Informed Network for Skeleton-based Action Segmentation via Spatial-Temporal Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34449-34459} }
GenTract: Generative Global Tractography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargood_2026_CVPR, author = {Sargood, Alec and Puglisi, Lemuel and Thompson, Elinor and Musolesi, Mirco and Alexander, Daniel C.}, title = {GenTract: Generative Global Tractography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35291-35300} }
ReCALL: Recalibrating Capability Degradation for MLLM-based Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Tianyu and He, ChenWei and Hao, Xiangzhao and Wang, Tianyue and Guo, Jiarui and Guo, Haiyun and Qu, Leigang and Wang, Jinqiao and Chua, Tat-Seng}, title = {ReCALL: Recalibrating Capability Degradation for MLLM-based Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38763-38773} }
Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Jinbo and Jiang, Zeyinzi and Tuo, Yuxiang and Mao, Chaojie and Gai, Xiaotang and Chen, Xi and Zhang, Jingfeng and Pan, Yulin and Han, Zhen and Xiao, Jie and Yan, Keyu and Xie, Chenwei and Zhong, Chongyang and Zhu, Kai and Shen, Tong and Huang, Lianghua and Liu, Yu and Yang, Yujiu}, title = {Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36267-36278} }
TINA: Text-Free Inversion Attack for Unlearned Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qianlong and Zhang, Miao and Zhang, Haoyu and Wang, Kun and Hou, Junhui and Nie, Liqiang}, title = {TINA: Text-Free Inversion Attack for Unlearned Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30076-30086} }
MUSE: Harnessing Precise and Diverse Semantics for Few-Shot Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiahao and Huang, Sheng and Zhang, Xin and Nan, Zhixiong and Dong, Jiajun and Mu, Nankun}, title = {MUSE: Harnessing Precise and Diverse Semantics for Few-Shot Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33911-33921} }
Tutor-Student Reinforcement Learning: A Dynamic Curriculum for Robust Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Zhanhe and Wang, Zhongyuan and Cheng, Jikang and Huang, Baojin and Yang, Yuhong and Han, Zhen and Liang, Chao and Ye, Dengpan}, title = {Tutor-Student Reinforcement Learning: A Dynamic Curriculum for Robust Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41436-41445} }
WalkGPT: Grounded Vision-Language Conversation with Depth-Aware Segmentation for Pedestrian Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ibn_Sultan_2026_CVPR, author = {Ibn Sultan, Rafi and Zhu, Hui and Zhou, Xiangyu and Li, Chengyin and Khanduri, Prashant and Brocanelli, Marco and Zhu, Dongxiao}, title = {WalkGPT: Grounded Vision-Language Conversation with Depth-Aware Segmentation for Pedestrian Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40085-40095} }
DLVP-CLIP: Enhancing Fine-Grained Zero-Shot Anomaly Detection via Dynamic Local Visual Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gaowei and Zhang, Lihe}, title = {DLVP-CLIP: Enhancing Fine-Grained Zero-Shot Anomaly Detection via Dynamic Local Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35524-35533} }
Minimal Constraint Relaxation for Multiview Autocalibration-
[pdf]
[supp]
[bibtex]@InProceedings{Kosaka_2026_CVPR, author = {Kosaka, Norio and Duff, Timothy and Pajdla, Tomas}, title = {Minimal Constraint Relaxation for Multiview Autocalibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28937-28946} }
SODA: Sensitivity-Oriented Dynamic Acceleration for Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Tong and Fu, Yusen and Sun, Guoying and Kong, Jingde and Tian, Zhuotao and Su, Jingyong}, title = {SODA: Sensitivity-Oriented Dynamic Acceleration for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33012-33021} }
PhysHO: Physics-Based Dynamic 3D Gaussian Human and Object from Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Suyi and Lee, Gim Hee}, title = {PhysHO: Physics-Based Dynamic 3D Gaussian Human and Object from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32507-32517} }
CaST-Bench: Benchmarking Causal Chain-Grounded Spatio-Temporal Reasoning for Video Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingfang and Pan, Jingjing and Kumar, Ashutosh and Saini, Rajat and Erdogan, Mustafa and Yang, Hsuan-Kung and Kang, Caixin and Huang, Yifei and Sato, Yoichi and Kong, Quan}, title = {CaST-Bench: Benchmarking Causal Chain-Grounded Spatio-Temporal Reasoning for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31856-31866} }
TTRV: Test-Time Reinforcement Learning for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Akshit and Marjit, Shyam and Lin, Wei and Gavrikov, Paul and Yeung-Levy, Serena and Kuehne, Hilde and Feris, Rogerio and Doveh, Sivan and Glass, James and Mirza, M. Jehanzeb}, title = {TTRV: Test-Time Reinforcement Learning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33153-33163} }
CaT-GS: Efficient 3DGS Rendering for Large-Scale Scenes with Inter-frame Caching and Tile Scheduling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tingjia and Chen, Bo and Liu, Shengzhong and Wu, Fan and Chen, Guihai}, title = {CaT-GS: Efficient 3DGS Rendering for Large-Scale Scenes with Inter-frame Caching and Tile Scheduling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37391-37400} }
SpatialScore: Towards Comprehensive Evaluation for Spatial Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haoning and Huang, Xiao and Chen, Yaohui and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {SpatialScore: Towards Comprehensive Evaluation for Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31029-31041} }
Enhance-then-Balance Modality Collaboration for Robust Multimodal Sentiment Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Kang and Ding, Yuzhe and Wang, Xinrong and Li, Fei and Teng, Chong and Ji, Donghong}, title = {Enhance-then-Balance Modality Collaboration for Robust Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30183-30193} }
SynMotion: Semantic-Visual Adaptation for Motion Customized Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Shuai and Gong, Biao and Wei, Yujie and Zhang, Shiwei and Liu, Zhuoxin and Ma, Ke and Wang, Yan and Zheng, Kecheng and Zhu, Xing and Shen, Yujun and Zhao, Hengshuang}, title = {SynMotion: Semantic-Visual Adaptation for Motion Customized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30477-30489} }
EvObj: Learning Evolving Object-centric Representations for 3D Instance Segmentation without Scene Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jiahao and Zhang, Zihui and Yang, Yafei and Li, Jinxi and Wei, Shenxing and Sun, Zhixuan and Yang, Bo}, title = {EvObj: Learning Evolving Object-centric Representations for 3D Instance Segmentation without Scene Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39817-39826} }
Joint Learning of General and Diverse Patterns with Mixture of Memory Experts for Weakly-Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Bo and Chen, Junxi and Wu, Zhe and Gao, Feng and Yang, Fan and Su, Li and Wang, Yaowei}, title = {Joint Learning of General and Diverse Patterns with Mixture of Memory Experts for Weakly-Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35638-35647} }
Mask to Align, Weight to Disambiguate: Reliable Unsupervised Cross-Modal Hashing with Masked-Weight Contrast-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Fan and Zhao, Yuanzhi and Zhao, Haimei and Zhao, Yudong and Xu, Haikun}, title = {Mask to Align, Weight to Disambiguate: Reliable Unsupervised Cross-Modal Hashing with Masked-Weight Contrast}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30151-30161} }
Grounding Everything in Tokens for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Xiangxuan and Wang, Zhongdao and Hou, Liping and Tang, Pin and Wang, Guoqing and Ma, Chao}, title = {Grounding Everything in Tokens for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41171-41181} }
Mind the Generative Details: Direct Localized Detail Preference Optimization for Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zitong and Zhang, Kaidong and Ding, Yukang and Gao, Chao and Ding, Rui and Chen, Ying and Zuo, Wangmeng}, title = {Mind the Generative Details: Direct Localized Detail Preference Optimization for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35998-36008} }
Duala: Dual-Level Alignment of Subjects and Stimuli for Cross-Subject fMRI Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shumeng and Guo, Jintao and Zhang, Jian and Zhou, Yulin and Cao, Luyang and Shi, Yinghuan}, title = {Duala: Dual-Level Alignment of Subjects and Stimuli for Cross-Subject fMRI Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42722-42731} }
Affordance Field Intervention: Enabling VLAs to Escape Memory Traps in Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Siyu and Wang, Zijian and Wang, Yunke and Xia, Chenghao and Huang, Tao and Xu, Chang}, title = {Affordance Field Intervention: Enabling VLAs to Escape Memory Traps in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37206-37215} }
CAR-SAM: Cross-Attention Reconstruction for Post-Training Quantization of the Segment Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Houji and Yu, Jiangyong and Yang, Dawei and Li, Jun}, title = {CAR-SAM: Cross-Attention Reconstruction for Post-Training Quantization of the Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33632-33641} }
Evidential Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Ruxiao and Wong, Alex}, title = {Evidential Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28632-28641} }
ThinkingViT: Matryoshka Thinking Vision Transformer for Elastic Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hojjat_2026_CVPR, author = {Hojjat, Ali and Haberer, Janek and Pirk, S\"oren and Landsiedel, Olaf}, title = {ThinkingViT: Matryoshka Thinking Vision Transformer for Elastic Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41923-41933} }
Progress by Pieces: Test-Time Scaling for Autoregressive Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Joonhyung and Jang, Hyeongwon and Kim, Joowon and Yang, Eunho}, title = {Progress by Pieces: Test-Time Scaling for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38091-38100} }
DreamStyle: A Unified Framework for Video Stylization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Chen, Jinshu and Zhao, Songtao and Feng, Wanquan and Tu, Pengqi and He, Qian}, title = {DreamStyle: A Unified Framework for Video Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36019-36029} }
Uncertainty-Aware Modality Fusion for Unaligned RGB-T Salient Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Mianzhao and Shi, Fan and Cheng, Xu and Jia, Chen and Chen, Shengyong}, title = {Uncertainty-Aware Modality Fusion for Unaligned RGB-T Salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41489-41498} }
Texvent: Asynchronous Event Data Simulation via Text Prompt-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ruofei and Duan, Peiqi and Cheung, Ka Chun and See, Simon and Shi, Boxin and Wan, Renjie}, title = {Texvent: Asynchronous Event Data Simulation via Text Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36375-36384} }
Coupled Diffusion Sampling for Training-Free Multi-View Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alzayer_2026_CVPR, author = {Alzayer, Hadi and Zhang, Yunzhi and Geng, Chen and Huang, Jia-Bin and Wu, Jiajun}, title = {Coupled Diffusion Sampling for Training-Free Multi-View Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43686-43696} }
Video Panels for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Doorenbos_2026_CVPR, author = {Doorenbos, Lars and Spurio, Federico and Gall, Juergen}, title = {Video Panels for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31293-31303} }
Learning Transferable Temporal Primitives for Video Reasoning via Synthetic Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Songtao and Song, Sibo and Zhou, Chenyi and Wang, Yuan and Chen, Ruizhe and Guan, Tongkun and Luo, Ruilin and Zhang, Yan and Tang, Zhihang and Sun, Yuchong and Zhang, Hang and Yang, Zhibo and Bai, Shuai and Lin, Junyang and Liu, Zuozhu}, title = {Learning Transferable Temporal Primitives for Video Reasoning via Synthetic Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31283-31292} }
Understanding, Accelerating, and Improving MeanFlow Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jin-Young and Go, Hyojun and Bogensperger, Lea and Erbach, Julius and Kalischek, Nikolai and Tombari, Federico and Schindler, Konrad and Narnhofer, Dominik}, title = {Understanding, Accelerating, and Improving MeanFlow Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37992-38003} }
VisualOverload: Probing Visual Understanding of VLMs in Really Dense Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gavrikov_2026_CVPR, author = {Gavrikov, Paul and Lin, Wei and Mirza, M. Jehanzeb and Jahagirdar, Soumya and Huzaifa, Muhammad and Doveh, Sivan and Glass, James and Yeung-Levy, Serena and Kuehne, Hilde}, title = {VisualOverload: Probing Visual Understanding of VLMs in Really Dense Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40833-40844} }
STRNet: Visual Navigation with Spatio-Temporal Representation through Dynamic Graph Aggregation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Hao and Bi, Zetong and Zeng, Yiming and Wan, Zhaoliang and Qi, Lu and Cheng, Hui}, title = {STRNet: Visual Navigation with Spatio-Temporal Representation through Dynamic Graph Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42464-42473} }
From 2D Alignment to 3D Plausibility: Unifying Heterogeneous 2D Priors and Penetration-Free Diffusion for Occlusion-Robust Two-Hand Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Gaoge and Cheng, Yongkang and Chen, Zhe and Huang, Shaoli and Liu, Tongliang}, title = {From 2D Alignment to 3D Plausibility: Unifying Heterogeneous 2D Priors and Penetration-Free Diffusion for Occlusion-Robust Two-Hand Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42800-42809} }
Video-CoE: Reinforcing Video Event Prediction via Chain of Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Qile and Tang, Jing and Chen, Rui and Sun, Lei and Chu, Xiangxiang}, title = {Video-CoE: Reinforcing Video Event Prediction via Chain of Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32818-32828} }
FiDeSR: High-Fidelity and Detail-Preserving One-Step Diffusion Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Aro and Jang, Myeongjin and Moon, Chaewon and Shin, Youngjin and Jeong, Jinwoo and Park, Sang-hyo}, title = {FiDeSR: High-Fidelity and Detail-Preserving One-Step Diffusion Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38270-38280} }
QuantVLA: Scale-Calibrated Post-Training Quantization for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingxuan and Hsieh, Yunta and Wan, Zhongwei and Lin, Haokun and Wang, Xin and Wang, Ziqi and Lei, Yingtie and Zhang, Mi}, title = {QuantVLA: Scale-Calibrated Post-Training Quantization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39539-39549} }
ImmerIris: A Large-Scale Dataset and Benchmark for Off-Axis and Unconstrained Iris Recognition in Immersive Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2026_CVPR, author = {Mi, Yuxi and Yuan, Qiuyang and Zhong, Zhizhou and Zhao, Xuan and Zhou, Jiaogen and Zhu, Fubao and Guan, Jihong and Zhou, Shuigeng}, title = {ImmerIris: A Large-Scale Dataset and Benchmark for Off-Axis and Unconstrained Iris Recognition in Immersive Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28838-28847} }
Foundation Model Priors Enhance Object Focus in Feature Space for Source-Free Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{VCR_2026_CVPR, author = {VCR, Sairam and Lalla, Rishabh and Dayal, Aveen and Kulkarni, Tejal and Lalla, Anuj and Balasubramanian, Vineeth N. and Khan, Muhammad Haris}, title = {Foundation Model Priors Enhance Object Focus in Feature Space for Source-Free Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29431-29440} }
Reading or Reasoning? Format Decoupled Reinforcement Learning for Document OCR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yufeng and Chen, Lei and Zeng, Zhixiong and Zhao, Xuanle and Jiang, Deyang and Zheng, Liming and Huang, Jing and Qiu, Haibo and Shi, Peng and Yang, Siqi and Ma, Lin}, title = {Reading or Reasoning? Format Decoupled Reinforcement Learning for Document OCR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33164-33173} }
$\phi$-DPO: Fairness Direct Preference Optimization Approach to Continual Learning in Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Truong_2026_CVPR, author = {Truong, Thanh-Dat and Tran, Huu-Thien and Cothren, Jackson and Raj, Bhiksha and Luu, Khoa}, title = {\${\textbackslash}phi\$-DPO: Fairness Direct Preference Optimization Approach to Continual Learning in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39922-39934} }
PhyCo: Learning Controllable Physical Priors for Generative Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Narayanan_2026_CVPR, author = {Narayanan, Sriram and Jiang, Ziyu and Narasimhan, Srinivasa and Chandraker, Manmohan}, title = {PhyCo: Learning Controllable Physical Priors for Generative Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41892-41902} }
Learning Straight Flows: Variational Flow Matching for Efficient Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Chenrui and Xiao, Xi and Wang, Tianyang and Wang, Xiao and Shen, Yanning}, title = {Learning Straight Flows: Variational Flow Matching for Efficient Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38154-38164} }
DCoAR: Deep Concept Injection into Unified Autoregressive Models for Personalized Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Fangtai and Liu, Mushui and He, Weijie and Wang, Zhao and Yu, Yunlong}, title = {DCoAR: Deep Concept Injection into Unified Autoregressive Models for Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29265-29274} }
Thinking with Video: Video Generation as a Promising Multimodal Reasoning Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2026_CVPR, author = {Tong, Jingqi and Mou, Yurong and Li, Hangcheng and Li, Mingzhe and Yang, Yongzhuo and Zhang, Ming and Chen, Qiguang and Liang, Tianyi and Hu, Xiaomeng and Zheng, Yining and Chen, Xinchi and Zhao, Jun and Huang, Xuanjing and Qiu, Xipeng}, title = {Thinking with Video: Video Generation as a Promising Multimodal Reasoning Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41121-41129} }
ID-Crafter: VLM-Grounded Online RL for Compositional Multi-Subject Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Panwang and Zhao, Jingjing and Lin, Yuchen and Lin, Chenguo and Li, Chenxin and Liu, Hengyu and Shen, Tingting and Mu, Yadong}, title = {ID-Crafter: VLM-Grounded Online RL for Compositional Multi-Subject Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36627-36637} }
Order Matters: 3D Shape Generation from Sequential VR Sketches-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yizi and Wu, Sidi and Xiao, Tianyi and Wiedemann, Nina and Landrieu, Loic}, title = {Order Matters: 3D Shape Generation from Sequential VR Sketches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34259-34269} }
DF^2-VB: Dual-level Fuzzy Fusion with View-specific Boosting for Multi-view Multi-label Classification-
[pdf]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yuena and Cai, Haichun and Shan, Yi and Wei, Hao and Deng, Yongjian and Yang, Zhen and Lyu, Gengyu}, title = {DF{\textasciicircum}2-VB: Dual-level Fuzzy Fusion with View-specific Boosting for Multi-view Multi-label Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33859-33868} }
GeoSURGE: Geo-localization using Semantic Fusion with Hierarchy of Geographic Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Daruna_2026_CVPR, author = {Daruna, Angel and Meegan, Nicholas and Chiu, Han-Pang and Samarasekera, Supun and Kumar, Rakesh}, title = {GeoSURGE: Geo-localization using Semantic Fusion with Hierarchy of Geographic Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41396-41405} }
Taxonomy-Aware Representation Alignment for Hierarchical Visual Recognition with Large Multimodal Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Hulingxiao and Tan, Zhi and Peng, Yuxin}, title = {Taxonomy-Aware Representation Alignment for Hierarchical Visual Recognition with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31124-31134} }
WorldLens: Full-Spectrum Evaluations of Driving World Models in Real World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Ao and Kong, Lingdong and Yan, Tianyi and Liu, Hongsi and Yang, Yu and Huang, Ziqi and Yin, Wei and Zuo, Jialong and Hu, Yixuan and Zhu, Dekai and Lu, Dongyue and Liu, Youquan and Jiang, Guangfeng and Li, Linfeng and Li, Xiangtai and Zhuo, Long and Ng, Lai Xing and Cottereau, Benoit R. and Gao, Changxin and Pan, Liang and Ooi, Wei Tsang and Liu, Ziwei}, title = {WorldLens: Full-Spectrum Evaluations of Driving World Models in Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36385-36399} }
DiffGraph: An Automated Agent-driven Model Merging Framework for In-the-Wild Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhuoling and Rahmani, Hossein and Zhang, Jiarui and Xue, Yu and Mirmehdi, Majid and Kuen, Jason and Gu, Jiuxiang and Liu, Jun}, title = {DiffGraph: An Automated Agent-driven Model Merging Framework for In-the-Wild Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36713-36723} }
Tracking-Guided 4D Generation: Foundation-Tracker Motion Priors for 3D Model Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Su and Zhao, Cheng and Mittal, Himangi and Mittal, Gaurav and Kukkala, Rohith and Chen, Yingjie Victor and Chen, Mei}, title = {Tracking-Guided 4D Generation: Foundation-Tracker Motion Priors for 3D Model Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40992-41001} }
Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Piedade_2026_CVPR, author = {Piedade, Valter and Manam, Lalit and Yamazaki, Masashi and Miraldo, Pedro}, title = {Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28925-28936} }
RobustVisRAG: Causality-Aware Vision-Based Retrieval-Augmented Generation under Visual Degradations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, I-Hsiang and Liu, Yu-Wei and Wu, Tse-Yu and Chiang, Yu-Chien and Yang, Jen-Chieh and Chen, Wei-Ting}, title = {RobustVisRAG: Causality-Aware Vision-Based Retrieval-Augmented Generation under Visual Degradations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40802-40811} }
Edit2Perceive: Image Editing Diffusion Models Are Strong Dense Perceivers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yiqing and Song, Yiren and Shou, Mike Zheng}, title = {Edit2Perceive: Image Editing Diffusion Models Are Strong Dense Perceivers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43568-43577} }
SWIFT: Sliding Window Reconstruction for Few-Shot Training-Free Generated Video Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chao and Yang, Zijin and Wang, Yaofei and Qi, Yuang and Zhang, Weiming and Yu, Nenghai and Chen, Kejiang}, title = {SWIFT: Sliding Window Reconstruction for Few-Shot Training-Free Generated Video Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31725-31734} }
Mapping Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sen_2026_CVPR, author = {Sen, Lord and Mukherjee, Shyamapada}, title = {Mapping Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36215-36223} }
MotionMaster: Generalizable Text-Driven Motion Generation and Editing-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nan and Li, Yunhao and Pang, Lexi and He, Zimo and Huang, Siyuan and Zhu, Yixin}, title = {MotionMaster: Generalizable Text-Driven Motion Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30629-30639} }
LATA: Laplacian-Assisted Transductive Adaptation for Conformal Uncertainty in Medical VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bozorgtabar_2026_CVPR, author = {Bozorgtabar, Behzad and Mahapatra, Dwarikanath and Roy, Sudipta and Naseer, Muzammal and Razzak, Imran and Ge, Zongyuan}, title = {LATA: Laplacian-Assisted Transductive Adaptation for Conformal Uncertainty in Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36311-36320} }
Decouple to Generalize: Context-First Self-Evolving Learning for Data-Scarce Vision-Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tingyu and Sun, Zheng and Wei, Jingxuan and He, Conghui and Wu, Lijun and Tan, Cheng}, title = {Decouple to Generalize: Context-First Self-Evolving Learning for Data-Scarce Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29357-29366} }
MORE-STEM: Long-Short MemOry REcall and Spatio-TEmporal Consistency Model for Query-Driven 3D/4D Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chade and Feng, Haida and Zhang, Pengju and Wu, Yihong}, title = {MORE-STEM: Long-Short MemOry REcall and Spatio-TEmporal Consistency Model for Query-Driven 3D/4D Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31462-31471} }
BiEvLight: Bi-level Learning of Task-Aware Event Refinement for Low-Light Image Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Zishu and Su, Xiang-Xiang and Zhou, Shengning and Chen, Guang-Yong and Fan, Guodong and Chen, Xing}, title = {BiEvLight: Bi-level Learning of Task-Aware Event Refinement for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41541-41550} }
Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tzu Ling and Stavness, Ian and Rochan, Mrigank}, title = {Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31262-31271} }
RxnCaption: Reformulating Reaction Diagram Parsing as Visual Prompt Guided Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Jiahe and Wang, Chuang and Jiang, Bowen and Wang, Yinfan and Zheng, Hao and Wei, Xingjian and Liu, Chengjin and Nie, Rui and Gao, Junyuan and Sun, Jiaxing and Wang, Yubin and Wu, Lijun and Huang, Zhenhua and Wu, Jiang and Yu, Qian and He, Conghui}, title = {RxnCaption: Reformulating Reaction Diagram Parsing as Visual Prompt Guided Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30864-30873} }
Multi-Patch Global-to-Local Transformer Architecture For Efficient Flow Matching and Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dao_2026_CVPR, author = {Dao, Quan and Metaxas, Dimitris}, title = {Multi-Patch Global-to-Local Transformer Architecture For Efficient Flow Matching and Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33000-33011} }
VidEoMT: Your ViT is Secretly Also a Video Segmentation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Norouzi_2026_CVPR, author = {Norouzi, Narges and Zulfikar, Idil Esen and Cavagnero, Niccol\`o and Kerssies, Tommie and Leibe, Bastian and Dubbelman, Gijs and de Geus, Daan}, title = {VidEoMT: Your ViT is Secretly Also a Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35177-35186} }
HQC-NBV: A Hybrid Quantum-Classical View Planning Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xiaotong and Chen, Chang Wen}, title = {HQC-NBV: A Hybrid Quantum-Classical View Planning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35091-35100} }
Multi-view Consistent 3D Gaussian Head Avatars 'without' Multi-view Generation-
[pdf]
[bibtex]@InProceedings{Chharia_2026_CVPR, author = {Chharia, Aviral and De la Torre, Fernando}, title = {Multi-view Consistent 3D Gaussian Head Avatars 'without' Multi-view Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40163-40174} }
D2T2 - Multimodal Automated Planning for Brachytherapy-
[pdf]
[supp]
[bibtex]@InProceedings{Moore_2026_CVPR, author = {Moore, Lance C. and Mitra, Aranyo and Truong, Ryan and Kallis, Karoline and Kisling, Kelly and Meyers, Sandra M. and Vasconcelos, Nuno}, title = {D2T2 - Multimodal Automated Planning for Brachytherapy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42670-42680} }
Toward Early Quality Assessment of Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Huanlei and Wei, Hongxin and Jing, Bingyi}, title = {Toward Early Quality Assessment of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38410-38419} }
Semantic Foam: Unifying Spatial and Semantic Scene Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharafeldin_2026_CVPR, author = {Sharafeldin, Amr and Mikaeili, Aryan and Walker, Thomas and Govindarajan, Shrisudhan and Rebain, Daniel and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Semantic Foam: Unifying Spatial and Semantic Scene Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29814-29823} }
Mantis: A Versatile Vision-Language-Action Model with Disentangled Visual Foresight-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Li, Xueqi and Chen, Yiyang and Song, Jin and Wang, Yihan and Xiao, Zipeng and Su, Jiadi and Qiaoben, You and Liu, Pengfei and Deng, Zhijie}, title = {Mantis: A Versatile Vision-Language-Action Model with Disentangled Visual Foresight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42505-42515} }
DetectSCI: Toward Object-Guided ROI Reconstruction for High-Resolution Video Snapshot Compressive Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xingjian and Wang, Lishun and Wang, Ping and Yuan, Xin}, title = {DetectSCI: Toward Object-Guided ROI Reconstruction for High-Resolution Video Snapshot Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41720-41729} }
Explaining CLIP Zero-shot Predictions Through Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ozdemir_2026_CVPR, author = {Ozdemir, Onat and Christensen, Anders and Alaniz, Stephan and Akata, Zeynep and Akbas, Emre}, title = {Explaining CLIP Zero-shot Predictions Through Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31336-31345} }
SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Papais_2026_CVPR, author = {Papais, Sandro and Feng, Lezhou and Cossette, Charles and Ge, Lingting}, title = {SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40751-40761} }
Consensus vs. Controversy: Mapping the Decision Space Where Architectures Diverge-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {Consensus vs. Controversy: Mapping the Decision Space Where Architectures Diverge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34566-34574} }
SIR: Structured Image Representations for Explainable Robot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Mattes_2026_CVPR, author = {Mattes, Paul and Schwab, Jan and Bosch, Jens and Li, Maximilian Xiling and Blank, Nils and Tang, Minh-Trung and Haberland, Moritz and Lioutikov, Rudolf}, title = {SIR: Structured Image Representations for Explainable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42484-42493} }
Unified Generation and Self-Verification for Vision-Language Models via Advantage Decoupled Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xinyu and Jia, Heng and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Yang, Yi and Zhu, Linchao}, title = {Unified Generation and Self-Verification for Vision-Language Models via Advantage Decoupled Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36916-36925} }
Towards Calibrating Prompt Tuning of Vision- Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharifdeen_2026_CVPR, author = {Sharifdeen, Ashshak and Shamshad, Fahad and Munir, Muhammad Akhtar and Basu, Abhishek and Ismithdeen, Mohamed and Jeyamohan, Jeyapriyan and Silva, Chathurika and Nandakumar, Karthik and Khan, Muhammad Haris}, title = {Towards Calibrating Prompt Tuning of Vision- Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39131-39140} }
Rethinking Glyph Spatial Information in Font Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Peng and Yang, Xi}, title = {Rethinking Glyph Spatial Information in Font Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29190-29199} }
Revisiting Multimodal KV Cache Compression: A Frequency-Domain-Guided Outlier-KV-Aware Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yaoxin and Ye, Peng and Tan, Xudong and Tu, Chongjun and Zhao, Maosen and Hao, Jia and Chen, Tao}, title = {Revisiting Multimodal KV Cache Compression: A Frequency-Domain-Guided Outlier-KV-Aware Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39550-39560} }
Dynamic Important Example Mining for Reinforcement Finetuning-
[pdf]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Haoru and Wu, Sitong and Chen, Yanfeng and Zhao, Shizhen and Sun, Yang-Tian and Liu, Tianjia and Chang, Chirui and Zhang, Shaofeng and Sun, Samm and Wu, Xiuzhe and Xie, Ruobing and Qi, Xiaojuan}, title = {Dynamic Important Example Mining for Reinforcement Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41456-41466} }
SPOT: Spatiotemporal Prompt Optimization for Motion-Stabilized MLLM-Guided Video Segmentation-
[pdf]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Jiayi and Qin, Zheyun and Xi, Xiaoming and Nie, Xiushan and Yin, Yilong}, title = {SPOT: Spatiotemporal Prompt Optimization for Motion-Stabilized MLLM-Guided Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32236-32245} }
Perceptual-Evidence Anchored Reinforced Learning for Multimodal Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Xu, Yufei and Zeng, Zhixiong and Yang, Siqi and Shi, Peng and Ma, Lin and Zhang, Jing}, title = {Perceptual-Evidence Anchored Reinforced Learning for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41111-41120} }
Live Interactive Training for Video Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xinyu and Yu, Haozheng and Sun, Yihong and Hariharan, Bharath and Sun, Jennifer J.}, title = {Live Interactive Training for Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39827-39837} }
Learning Long-term Motion Embeddings for Efficient Kinematics Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stracke_2026_CVPR, author = {Stracke, Nick and Bauer, Kolja and Baumann, Stefan Andreas and Bautista, Miguel \'Angel and Susskind, Josh and Ommer, Bj\"orn}, title = {Learning Long-term Motion Embeddings for Efficient Kinematics Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42581-42591} }
MedFG-VQA: Low-Frequency Memory and Graph Attention for Lightweight Medical VQA-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Haowen and Pei, Gensheng and Sun, Zeren and Ren, Mingwu and Shu, Xiangbo and Yao, Yazhou and Shen, Fumin}, title = {MedFG-VQA: Low-Frequency Memory and Graph Attention for Lightweight Medical VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42755-42764} }
FastRef: Fast Prototype Refinement for Few-shot Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yufei and Tian, Long and Dai, Yuyang and Chen, Wenchao and Bao, Liang and Liu, Xiyang}, title = {FastRef: Fast Prototype Refinement for Few-shot Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43040-43049} }
SaPaVe: Towards Active Perception and Manipulation in Vision-Language Action Models for Robotics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mengzhen and Zhou, Enshen and Chi, Cheng and Han, Yi and Rong, Shanyu and Chen, Liming and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {SaPaVe: Towards Active Perception and Manipulation in Vision-Language Action Models for Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37164-37174} }
Understanding Task Transfer in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sachdeva_2026_CVPR, author = {Sachdeva, Bhuvan and Uppal, Karan and Java, Abhinav and Balasubramanian, Vineeth N.}, title = {Understanding Task Transfer in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28754-28763} }
The Universal Normal Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tasker_2026_CVPR, author = {Tasker, Chen and Betser, Roy and Gofer, Eyal and Levi, Meir Yossef and Gilboa, Guy}, title = {The Universal Normal Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32015-32025} }
PFGNet: A Fully Convolutional Frequency-Guided Peripheral Gating Network for Efficient Spatiotemporal Predictive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Xinyong and Sun, Changbin and Wang, Yong and Yang, Hongyu and Wu, Yuankai}, title = {PFGNet: A Fully Convolutional Frequency-Guided Peripheral Gating Network for Efficient Spatiotemporal Predictive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38848-38858} }
RoSAMDepth: Robust Self-supervised Depth Estimation Leveraging Segment Anything Model-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Xuanang and Ning, Zhiwei and Zhang, Gengming and Cao, Jiaxi and Yang, Runze and Zheng, Zhonglong and Yang, Jie and Xiao, Rong and Liu, Wei}, title = {RoSAMDepth: Robust Self-supervised Depth Estimation Leveraging Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34138-34147} }
IVAAN: Instance-level Vision-Language Alignment via Attribute-Guided Text Prompts Generation for Nuclei Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Jaehoon and Hu, Yi and Kim, Soopil and Jang, Jongseong and Lee, Soonyoung and Park, Sang Hyun}, title = {IVAAN: Instance-level Vision-Language Alignment via Attribute-Guided Text Prompts Generation for Nuclei Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29305-29314} }
FedAlign: Differentially Private Distribution Alignment for Non-IID Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Peng and Zhang, Jiapeng and Song, Yingjie and Xiao, Xiong and Tang, Zhuo}, title = {FedAlign: Differentially Private Distribution Alignment for Non-IID Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31769-31778} }
NeoVerse: Enhancing 4D World Model with in-the-wild Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuxue and Fan, Lue and Shi, Ziqi and Peng, Junran and Wang, Feng and Zhang, Zhaoxiang}, title = {NeoVerse: Enhancing 4D World Model with in-the-wild Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40340-40351} }
Revisiting Model Stitching In the Foundation Model Era-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Zheda and Zhang, Ke and Wang, Fu-En and Wang, Zixiao Ken and Chen, Albert Y. C. and Xia, Lu and Sun, Min and Chao, Wei-Lun and Kuo, Cheng-Hao}, title = {Revisiting Model Stitching In the Foundation Model Era}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41342-41351} }
Simple but Effective Triplet-Based Compression Strategies for Compact Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Sattler_2026_CVPR, author = {Sattler, Torsten and Kukelova, Zuzana}, title = {Simple but Effective Triplet-Based Compression Strategies for Compact Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29047-29059} }
Generalizable Co-Salient Object Detection via Mixed Content-Style Modulation-
[pdf]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Guanting and Hu, Shenglong and Zhang, Kaihua and Liu, Guangcan and Xia, Min}, title = {Generalizable Co-Salient Object Detection via Mixed Content-Style Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32173-32183} }
SHAPE: Structure-aware Hierarchical Unsupervised Domain Adaptation with Plausibility Evaluation for Medical Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Linkuan and Xia, Yinghao and Shen, Yufei and Li, Xiangyu and Du, Wenjie and Cong, Cong and Wei, Leyi and Su, Ran and Jin, Qiangguo}, title = {SHAPE: Structure-aware Hierarchical Unsupervised Domain Adaptation with Plausibility Evaluation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30000-30010} }
Region-Aware Instance Consistency Learning for Micro-Expression Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Yaomin and Chen, C. L. Philip and Xu, Shiting and Liu, Haiqi and Zhang, Tong}, title = {Region-Aware Instance Consistency Learning for Micro-Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34428-34438} }
Sampling-Aware Quantization for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Qian and Song, Jie and Wan, Yuanyu and Wang, Huiqiong and Song, Mingli}, title = {Sampling-Aware Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35831-35840} }
AdvFM: Lookahead Flow-Matching Velocity-Field Attacks for Imperceptible and Transferable Adversarial Examples-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Runze and Wang, Zeyue and Sun, Fanghui and Liu, Rui and Yan, Yihan and Wang, Shen and Zhang, Zhaoyang}, title = {AdvFM: Lookahead Flow-Matching Velocity-Field Attacks for Imperceptible and Transferable Adversarial Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42290-42299} }
MLLMSplat: A 2D MLLM-Powered Framework for 3D Gaussian Splatting Understanding, Generation, and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Xiu_2026_CVPR, author = {Xiu, Jingqiao and Wang, Can and Xu, Dong}, title = {MLLMSplat: A 2D MLLM-Powered Framework for 3D Gaussian Splatting Understanding, Generation, and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33301-33311} }
Label What Matters: Modality-Balanced and Difficulty-Aware Multimodal Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yuqiao and Wang, Xu and Liang, Tengfei and Hao, Yiqing and Jin, Yi and Yu, Hui}, title = {Label What Matters: Modality-Balanced and Difficulty-Aware Multimodal Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29391-29399} }
D-Convexity: A Unified Differentiable Convex Shape Prior via Quasi-Concavity for Data-driven Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shengzhe and Yan, Hao}, title = {D-Convexity: A Unified Differentiable Convex Shape Prior via Quasi-Concavity for Data-driven Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34755-34764} }
Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Huatian and Mao, Zhendong and Zhang, Lei and Zhang, Yongdong}, title = {Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37831-37841} }
AT-VLA: Adaptive Tactile Injection for Enhanced Feedback Reaction in Vision-Language-Action Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaoqi and Cai, Muhe and Xu, Jiadong and Zhu, Juan and Fan, Hongwei and Shen, Yan and Ren, Guangrui and Dong, Hao}, title = {AT-VLA: Adaptive Tactile Injection for Enhanced Feedback Reaction in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28764-28774} }
SAGE: Training Smart Any-Horizon Agents for Long Video Reasoning with Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Jain_2026_CVPR, author = {Jain, Jitesh and blank, Allen AI and Li, Jialuo and Ma, Zixian and Zhang, Jieyu and Kim, Chris Dongjoo and Lee, Sangho and Tripathi, Rohun and Gupta, Tanmay and Clark, Christopher and Shi, Humphrey}, title = {SAGE: Training Smart Any-Horizon Agents for Long Video Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41478-41488} }
Optical Flow Matching: Reframing Optical Flow as Continuous Transport Dynamics-
[pdf]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Ao and Li, Xin and Yang, Fan and Li, Yuezun and Yuan, Zhaoquan and Zhao, Shan and Su, Bing and Wu, Xiao}, title = {Optical Flow Matching: Reframing Optical Flow as Continuous Transport Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28669-28678} }
Smoothing the Score Function to Enhance Generalization in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xinyu and Zhang, Jiawei and Wright, Stephen J.}, title = {Smoothing the Score Function to Enhance Generalization in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43208-43217} }
SegMo: Co-Designing Content-Aware Sparsity and Locally-Cohesive Segment Parallelism for Efficient VLM Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haojuan and Tang, Ruohan and Cheng, Dongzhou and Zhang, Zongpu and Li, Jian and Wang, Jiaqi}, title = {SegMo: Co-Designing Content-Aware Sparsity and Locally-Cohesive Segment Parallelism for Efficient VLM Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33598-33608} }
TempoMaster: Efficient Long Video Generation via Next-Frame-Rate Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yukuo and Liu, Cong and Wang, Junke and Liu, Junqi and Huang, Haibin and Wu, Zuxuan and Zhang, Chi and Li, Xuelong}, title = {TempoMaster: Efficient Long Video Generation via Next-Frame-Rate Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30414-30424} }
Learning from Oblivion: Predicting Knowledge-Overflowed Weights via Retrodiction of Forgetting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Jinhyeok and Kim, Jaehong and Kim, Jung Uk}, title = {Learning from Oblivion: Predicting Knowledge-Overflowed Weights via Retrodiction of Forgetting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39890-39900} }
EvoID: Reinforced Evolution for Identity-Preserving Video Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiheng and Qiu, Zhaofan and Liu, Zunxu and Pan, Yingwei and Yao, Ting and Mei, Tao}, title = {EvoID: Reinforced Evolution for Identity-Preserving Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41870-41880} }
Flow4DGS-SLAM: Optical Flow-Guided 4D Gaussian Splatting SLAM-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yunsong and Lee, Gim Hee}, title = {Flow4DGS-SLAM: Optical Flow-Guided 4D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33364-33373} }
Intervention-Aware Multiscale Representation Learning from Imaging Phenomics and Perturbation Transcriptomics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jiayuan and Liu, Ruoqi and Gu, Zishan and Zhang, Ping}, title = {Intervention-Aware Multiscale Representation Learning from Imaging Phenomics and Perturbation Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41826-41835} }
HalluGen: Synthesizing Realistic and Controllable Hallucinations for Evaluating Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seunghoi and Tregidgo, Henry F. J. and Jin, Chen and Figini, Matteo and Alexander, Daniel C.}, title = {HalluGen: Synthesizing Realistic and Controllable Hallucinations for Evaluating Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32550-32560} }
NeuroSeg Meets DINOv3: Transferring 2D Self-Supervised Visual Priors to 3D Neuron Segmentation via DINOv3 Initialization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yik San and Zhao, Runkai and Cai, Weidong}, title = {NeuroSeg Meets DINOv3: Transferring 2D Self-Supervised Visual Priors to 3D Neuron Segmentation via DINOv3 Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30053-30064} }
TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Zhengpeng and Atzberger, Clement and Jaffer, Sadiq and Knezevic, Jovana and Sormunen, Silja and Young, Robin and Lisaius, Madeline C. and Immitzer, Markus and Jackson, Toby and Ball, James and Coomes, David A. and Madhavapeddy, Anil and Blake, Andrew and Keshav, Srinivasan}, title = {TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34818-34831} }
Bi-directional Autoregressive Diffusion for Large Complex Motion Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yongrui and Zhao, Shijie and Yao, Mingde and Li, Junlin and Zhang, Li and Liu, Xiaohong and Dou, Qi and Gu, Jinwei and Xue, Tianfan}, title = {Bi-directional Autoregressive Diffusion for Large Complex Motion Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35779-35788} }
MOS: Mitigating Optical-SAR Modality Gap for Cross-Modal Ship Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yujian and Liu, Hankun and Niu, Guanglin}, title = {MOS: Mitigating Optical-SAR Modality Gap for Cross-Modal Ship Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30335-30345} }
Latent Chain-of-Thought World Modeling for End-to-End Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Shuhan and Chitta, Kashyap and Chen, Yuxiao and Tian, Ran and You, Yurong and Wang, Yan and Luo, Wenjie and Cao, Yulong and Kr\"ahenb\"uhl, Philipp and Pavone, Marco and Ivanovic, Boris}, title = {Latent Chain-of-Thought World Modeling for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39724-39733} }
BDNet:Bio-Inspired Dual-Backbone Small Object Detection Network-
[pdf]
[supp]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Wenchao and Lin, Chuan and Huang, Sihan and Wang, Xiongzhen and Pang, Xintao}, title = {BDNet:Bio-Inspired Dual-Backbone Small Object Detection Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32724-32734} }
RoboTAG: End-to-end Robot Pose Estimation via Topological Alignment Graph-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yifan and Zhan, Fangneng and Li, Wanhua and Sun, Haowen and Fragkiadaki, Katerina and Pfister, Hanspeter}, title = {RoboTAG: End-to-end Robot Pose Estimation via Topological Alignment Graph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35146-35155} }
SegQuant: A Semantics-Aware and Generalizable Quantization Framework for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaji and Sun, Ruichao and Zhao, Hailiang and Wu, Jiaju and Chen, Peng and Li, Hao and Liu, Yuying and Chow, Kingsum and Xiong, Gang and Deng, Shuiguang}, title = {SegQuant: A Semantics-Aware and Generalizable Quantization Framework for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43717-43726} }
F^2HDR: Two-Stage HDR Video Reconstruction via Flow Adapter and Physical Motion Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Huanjing and Li, Dawei and Tu, Shaoxiong and Yang, Jingyu}, title = {F{\textasciicircum}2HDR: Two-Stage HDR Video Reconstruction via Flow Adapter and Physical Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33985-33994} }
Hi-Lo Prune: Look at What You'll Lose before Pruning with Hierarchical Token Selection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zixun and Dong, Yubo and Fan, Hehe and Yang, Yi}, title = {Hi-Lo Prune: Look at What You'll Lose before Pruning with Hierarchical Token Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31941-31951} }
VideoWorld 2: Learning Transferable Knowledge from Real-world Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Zhongwei and Wei, Yunchao and Yu, Xiao and Luo, Guixun and Zhao, Yao and Kang, Bingyi and Feng, Jiashi and Jin, Xiaojie}, title = {VideoWorld 2: Learning Transferable Knowledge from Real-world Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40569-40580} }
RDFace: A Benchmark Dataset for Rare Disease Facial Image Analysis under Extreme Data Scarcity and Phenotype-Aware Synthetic Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Ganlin and Long, Yuxi and Ali, Hafsa and Lou, Erin and Butt, Fahad and Liu, Qian and Wang, Yang and Hu, Pingzhao}, title = {RDFace: A Benchmark Dataset for Rare Disease Facial Image Analysis under Extreme Data Scarcity and Phenotype-Aware Synthetic Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42976-42986} }
TherA: Thermal-Aware Visual-Language Prompting for Controllable RGB-to-Thermal Infrared Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Dong-Guw and Rhee, Tai Hyoung and Jang, Hyunsoo and Shin, Young-Sik and Shin, Ukcheol and Kim, Ayoung}, title = {TherA: Thermal-Aware Visual-Language Prompting for Controllable RGB-to-Thermal Infrared Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36803-36813} }
Through the Frequency Lens: Cross-Domain Generalisable Gaze Estimation with Adaptive Modulation-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yang and Bao, Yiwei and Lu, Feng}, title = {Through the Frequency Lens: Cross-Domain Generalisable Gaze Estimation with Adaptive Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42858-42868} }
PosterIQ: A Design Perspective Benchmark for Poster Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yuheng and Zhang, Wen and Duan, Haodong and Zou, Xingxing}, title = {PosterIQ: A Design Perspective Benchmark for Poster Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29295-29304} }
Rethinking 2D-3D Registration: A Novel Network for High-Value Zone Selection and Representation Consistency Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zhixin and Liao, Bohao and Deng, Jiacheng and Yin, Xiaotian and Li, Xinjun and Chen, Yujia and Yin, Baoqun and Zhang, Tianzhu}, title = {Rethinking 2D-3D Registration: A Novel Network for High-Value Zone Selection and Representation Consistency Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39052-39063} }
Fuel Gauge: Estimating Chain-of-Thought Length Ahead of Time in Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuedong and Wei, Xiwen and Munir, Mustafa and Marculescu, Radu}, title = {Fuel Gauge: Estimating Chain-of-Thought Length Ahead of Time in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33436-33445} }
FB-CLIP: Fine-Grained Zero-Shot Anomaly Detection with Foreground-Background Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Ming and Huo, Yongsheng and Dou, Mingyu and Yin, Jianfu and Zhao, Peng and Wang, Yao and Hu, Cong and Hu, Bingliang and Wang, Quan}, title = {FB-CLIP: Fine-Grained Zero-Shot Anomaly Detection with Foreground-Background Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35659-35669} }
DINO Eats CLIP: Adapting Beyond Knowns for Open-set 3D Object Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xinwei and Zheng, Yansong and Han, Qianru and Wang, Zhichuan and Cai, Yuxuan and Zhou, Yang and Xia, Jingbo and Wang, Yulong and Xiang, Jinhai and Bai, Xiang}, title = {DINO Eats CLIP: Adapting Beyond Knowns for Open-set 3D Object Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34704-34713} }
MedLIME: A Distribution-Aligned and Evidence-Supported Framework for Medical Saliency Explanations-
[pdf]
[supp]
[bibtex]@InProceedings{Magazine_2026_CVPR, author = {Magazine, Raghav and Li, Xingjian and Xu, Min}, title = {MedLIME: A Distribution-Aligned and Evidence-Supported Framework for Medical Saliency Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38926-38935} }
Diffusion with a Linguistic Compass: Steering the Generation of Clinically Plausible Future sMRI Representations for Early MCI Conversion Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Zhihao and Li, Chaozhuo and Zhang, Litian and Zhang, Xi}, title = {Diffusion with a Linguistic Compass: Steering the Generation of Clinically Plausible Future sMRI Representations for Early MCI Conversion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42691-42700} }
MoLingo: Motion-Language Alignment for Text-to-Human Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yannan and Tiwari, Garvita and Zhang, Xiaohan and Bora, Pankaj and Birdal, Tolga and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {MoLingo: Motion-Language Alignment for Text-to-Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38387-38398} }
VL-Eraser: Vacuum Distillation for Machine Unlearning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yili and Dai, Lu and Huang, Tairan and Xu, Yijie and Xiong, Hui}, title = {VL-Eraser: Vacuum Distillation for Machine Unlearning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31504-31513} }
CoordSpeaker: Exploiting Gesture Captioning for Coordinated Caption-Empowered Co-Speech Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Fengyi and Yang, Sicheng and Yang, Wenming}, title = {CoordSpeaker: Exploiting Gesture Captioning for Coordinated Caption-Empowered Co-Speech Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30761-30771} }
Neu-PiG: Neural Preconditioned Grids for Fast Dynamic Surface Reconstruction on Long Sequences-
[pdf]
[supp]
[bibtex]@InProceedings{Kaltheuner_2026_CVPR, author = {Kaltheuner, Julian and Dr\"oge, Hannah and Plack, Markus and Stotko, Patrick and Klein, Reinhard}, title = {Neu-PiG: Neural Preconditioned Grids for Fast Dynamic Surface Reconstruction on Long Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36539-36549} }
VecAttention: Vector-wise Sparse Attention for Accelerating Long Context Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Anmin and Yang, Ruixuan and Jiang, Huiqiang and Lin, Bin and Sun, Minmin and Li, Yong and Zhang, Chen and Xie, Tao}, title = {VecAttention: Vector-wise Sparse Attention for Accelerating Long Context Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41299-41310} }
Statistical Characteristic-Guided Denoising for Rapid High-Resolution Transmission Electron Microscopy Imaging-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hesong and Wu, Ziqi and Shao, Ruiwen and Fu, Ying}, title = {Statistical Characteristic-Guided Denoising for Rapid High-Resolution Transmission Electron Microscopy Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34050-34060} }
GDRO: Group-level Reward Post-training Suitable for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyang and Chen, Xi and Xu, Xiaogang and Liu, Yu and Zhao, Hengshuang}, title = {GDRO: Group-level Reward Post-training Suitable for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43505-43513} }
mmWaveFlow: Unified Enhancement and Generation of mmWave Human Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Chang and Jin, Beihong and Shi, Qiwen and Wang, Zhi}, title = {mmWaveFlow: Unified Enhancement and Generation of mmWave Human Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31366-31376} }
Occlusion-Aware SORT: Observing Occlusion for Robust Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chunjiang and Ma, Jianbo and Shen, Li and Chen, Yanru and Chen, Liangyin}, title = {Occlusion-Aware SORT: Observing Occlusion for Robust Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42560-42570} }
TIACam: Text-Anchored Invariant Feature Learning with Auto-Augmentation for Camera-Robust Zero-Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tanvir_2026_CVPR, author = {Tanvir, Abdullah and Dasgupta, Agnibh and Zhong, Xin}, title = {TIACam: Text-Anchored Invariant Feature Learning with Auto-Augmentation for Camera-Robust Zero-Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43030-43039} }
AnthroTAP: Learning Point Tracking with Real-World Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Cho, Seokju and Koo, Jahyeok and Park, Junghyun and Huang, Jiahui and Lee, Honglak and Lee, Joon-Young and Kim, Seungryong}, title = {AnthroTAP: Learning Point Tracking with Real-World Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42516-42526} }
CodePercept: Code-Grounded Visual STEM Perception for MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Tongkun and Yang, Zhibo and Wan, Jianqiang and Yang, Mingkun and Guo, Zhentao and Hu, Zijian and Luo, Ruilin and Chen, Ruizhe and Jiang, Songtao and Wang, Peng and Shen, Wei and Lin, Junyang and Yang, Xiaokang}, title = {CodePercept: Code-Grounded Visual STEM Perception for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33542-33552} }
Bridging the Perception Gap in Image Super-Resolution Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Shaolin and Rocafort, Josep M. and Xue, Danna and Serrano-Lozano, David and Sun, Lei and Vazquez-Corral, Javier}, title = {Bridging the Perception Gap in Image Super-Resolution Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30532-30542} }
Collaborative Multi-Mode Pruning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zimeng and Wang, Yunhong and Wang, Donghao and Chen, Jiaxin}, title = {Collaborative Multi-Mode Pruning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39561-39571} }
MIBURI: Towards Expressive Interactive Gesture Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mughal_2026_CVPR, author = {Mughal, M. Hamza and Dabral, Rishabh and Demberg, Vera and Theobalt, Christian}, title = {MIBURI: Towards Expressive Interactive Gesture Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40031-40041} }
EEGiT: Teaching Vision Transformers to Understand the EEG signal-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Jiahao and Xu, Chenghao and Wang, Wei and Yang, Erkun and Deng, Cheng}, title = {EEGiT: Teaching Vision Transformers to Understand the EEG signal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40438-40447} }
Learning Surgical Robotic Manipulation with 3D Spatial Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Yu and Wang, Lidian and Chu, Xiaomeng and Deng, Jiajun and Cheng, Min and Zhang, Yanyong and Hua, Bei and Li, Houqiang and Ji, Jianmin}, title = {Learning Surgical Robotic Manipulation with 3D Spatial Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42441-42451} }
Beyond the Global Scores: Fine-Grained Token Grounding as a Robust Detector of LVLM Hallucinations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and Xie, Yutong and Nguyen, Cam-Tu and Nguyen, Minh Khoi and Nguyen, Dang Huy Pham and van den Hengel, Anton and Verjans, Johan and Le Nguyen, Phi and Phan, Vu Minh Hieu}, title = {Beyond the Global Scores: Fine-Grained Token Grounding as a Robust Detector of LVLM Hallucinations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40235-40244} }
Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yunhong and Zeng, Yanhong and Li, Haobo and Ouyang, Hao and Wang, Qiuyu and Cheng, Ka Leong and Zhu, Jiapeng and Cao, Hengyuan and Zhang, Zhipeng and Zhu, Xing and Shen, Yujun and Zhang, Min}, title = {Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34385-34397} }
Graph-to-Frame RAG: Visual-Space Knowledge Fusion for Training-Free and Auditable Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Yu, Weijiang and Liu, Ziyu and Tang, Guijian and Yang, Wenjing and Tan, Huibin and Xiao, Nong}, title = {Graph-to-Frame RAG: Visual-Space Knowledge Fusion for Training-Free and Auditable Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33805-33815} }
VCP-Attack: Visual-Contrastive Projection for Transferable Black-Box Targeted Attacks on Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiawei and Du, Minjie and Qin, Zihan and Wang, Zhuoran and Xie, Lizhe and Hu, Yining}, title = {VCP-Attack: Visual-Contrastive Projection for Transferable Black-Box Targeted Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30110-30119} }
Guiding Diffusion Models with Fine-Grained Conditions and Semantics-Preserving Sampling for One-Shot Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaojun and Liao, Tianchi and Liu, Zhiyuan and Chen, Chuan and Zheng, Zibin}, title = {Guiding Diffusion Models with Fine-Grained Conditions and Semantics-Preserving Sampling for One-Shot Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31779-31789} }
MemFlow: A Lightweight Forward Memorizing Framework for Quick Domain Adaptive Feature Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Jianming and Wang, Chengjun and Liang, Depin and Ma, Qianli and Chen, Wei and Cheng, Xueqi}, title = {MemFlow: A Lightweight Forward Memorizing Framework for Quick Domain Adaptive Feature Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36968-36977} }
See What We Cannot See: A Geo-guided Reasoning Benchmark for Object Counting under Adverse Earth Observation Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Tan, Zhihong and Wei, Hongchen and Yang, Daiqin and Chen, Zhenzhong}, title = {See What We Cannot See: A Geo-guided Reasoning Benchmark for Object Counting under Adverse Earth Observation Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42191-42201} }
WildPose: A Unified Framework for Robust Pose Estimation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jianhao and Zhu, Liyuan and Zhu, Zihan and Armeni, Iro}, title = {WildPose: A Unified Framework for Robust Pose Estimation in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28903-28913} }
Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Ziyue and Hou, Jiahe and Xia, Hongyu and Xie, Xinrui and Wang, Feifei and Zhou, Yuyin and Wang, Wei and Liu, Jiawei and Qu, Liangqiong}, title = {Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35967-35977} }
2ndMatch: Finetuning Pruned Diffusion Models via Second-Order Jacobian Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Caleb and Shlizerman, Eli}, title = {2ndMatch: Finetuning Pruned Diffusion Models via Second-Order Jacobian Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43385-43395} }
CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiange and Shi, Yansong and Zhu, Haoyi and Liu, Mingyu and Ma, Kaijing and Wang, Yating and Wu, Gangshan and He, Tong and Wang, Limin}, title = {CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42352-42363} }
Latent Diffusion Inversion Requires Understanding the Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rao_2026_CVPR, author = {Rao, Mingxing and Qu, Bowen and Moyer, Daniel}, title = {Latent Diffusion Inversion Requires Understanding the Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34971-34980} }
Seeing as Experts Do: A Knowledge-Augmented Agent for Open-Set Fine-Grained Visual Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junhan and Zhou, Zilu and Tong, Yujun and Chang, Dongliang and Luo, Yitao and Ma, Zhanyu}, title = {Seeing as Experts Do: A Knowledge-Augmented Agent for Open-Set Fine-Grained Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41446-41455} }
DSERT-RoLL: Robust Multi-Modal Perception for Diverse Driving Conditions with Stereo Event-RGB-Thermal Cameras, 4D Radar, and Dual-LiDAR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Hoonhee and Kang, Jae-Young and Jeong, Yuhwan and Yang, Yunseo and Lee, Wonyoung and Kim, Youngho and Yoon, Kuk-Jin}, title = {DSERT-RoLL: Robust Multi-Modal Perception for Diverse Driving Conditions with Stereo Event-RGB-Thermal Cameras, 4D Radar, and Dual-LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33022-33035} }
Seeing Motion Through Polarity for Event-based Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Meiqi and Zhang, Jiachao and Jiang, Xin and Yan, Rui and Yao, Yazhou and Li, Zechao and Shu, Xiangbo}, title = {Seeing Motion Through Polarity for Event-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37075-37085} }
Ultra-Fast Neural Video Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Xie, Wenxuan and Jia, Zhaoyang and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Lu, Yan}, title = {Ultra-Fast Neural Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41311-41321} }
CRAFT: Aligning Diffusion Models with Fine-Tuning Is Easier Than You Think-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zening and Xie, Zhengpeng and Bai, Lichen and Shao, Shitong and Yang, Shuo and Xie, Zeke}, title = {CRAFT: Aligning Diffusion Models with Fine-Tuning Is Easier Than You Think}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35841-35850} }
BEA-GS: BEyond RAdiance Supervision in 3DGS for Precise Object Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Mazzucchelli_2026_CVPR, author = {Mazzucchelli, Alessio and Naranjo-Almeida, Maria and Bustos-Sanchez, Jorge and Dimiccoli, Mariella and Moreno-Noguer, Francesc and Sanchez-Riera, Jordi and Penate-Sanchez, Adrian}, title = {BEA-GS: BEyond RAdiance Supervision in 3DGS for Precise Object Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41054-41064} }
FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Guo, Xiao and Su, Yiyang and Jain, Anil and Liu, Xiaoming}, title = {FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32756-32766} }
Learning Forgery-Aware Lip Representations Without Forgery Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Bofan and Zhu, Hongyu and He, Yi and Liang, Sichu and Wang, Shi-Lin}, title = {Learning Forgery-Aware Lip Representations Without Forgery Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42911-42921} }
How to Take a Memorable Picture? Empowering Users with Actionable Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Laiti_2026_CVPR, author = {Laiti, Francesco and Talon, Davide and Staiano, Jacopo and Ricci, Elisa}, title = {How to Take a Memorable Picture? Empowering Users with Actionable Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29738-29749} }
ZINA: Multimodal Fine-grained Hallucination Detection and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wada_2026_CVPR, author = {Wada, Yuiga and Matsuda, Kazuki and Sugiura, Komei and Neubig, Graham}, title = {ZINA: Multimodal Fine-grained Hallucination Detection and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32528-32538} }
Exploring Visual Pretraining for Learning Language Intelligence-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zhonghan and Zhang, Yiming and Zhang, Wenwei and Zhao, Haiteng and Wei, Xingguang and Gao, Zhangwei and Liu, Kuikun and Gu, Yuzhe and Wu, Size and Huang, Haian and Gao, Jianfei and Lv, Haijun and Song, Demin and Zhou, Yunhua and Guo, Qipeng and Wang, Gaoang and Chen, Kai}, title = {Exploring Visual Pretraining for Learning Language Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31493-31503} }
ReBaPL: Repulsive Bayesian Prompt Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bendou_2026_CVPR, author = {Bendou, Yassir and Ezzahir, Omar and Montesuma, Eduardo and Mahuas, Gabriel and Shevchenko, Victoria and Gartrell, Mike}, title = {ReBaPL: Repulsive Bayesian Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39962-39971} }
Model Merging in the Essential Subspace-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Longhua and Qi, Lei and Tian, Qi and Geng, Xin}, title = {Model Merging in the Essential Subspace}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31145-31154} }
RecTok: Reconstruction Distillation along Rectified Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Qingyu and Wu, Size and Bai, Jinbin and Yu, Kaidong and Wang, Yujing and Tong, Yunhai and Li, Xiangtai and Li, Xuelong}, title = {RecTok: Reconstruction Distillation along Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40685-40695} }
Virtual Immunohistochemistry Staining with Dual-Aligned Multi-Task Feature Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Shigeng and Xu, Hongming and Jiang, Guiyang and Rossi, Tuomo and K\"arkk\"ainen, Tommi and Cong, Fengyu}, title = {Virtual Immunohistochemistry Staining with Dual-Aligned Multi-Task Feature Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35311-35320} }
Fixed Anchors Are Not Enough: Dynamic Retrieval and Persistent Homology for Dataset Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Muquan and Gou, Hang and Ma, Yingyi and Wang, Rongzheng and Qin, Ke and He, Tao}, title = {Fixed Anchors Are Not Enough: Dynamic Retrieval and Persistent Homology for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33963-33972} }
CASPA: Graph-Structured Concept Anchors for Modality-Agnostic Adaptation in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chatterjee_2026_CVPR, author = {Chatterjee, Abhiroop and Ghosh, Susmita and Ghosh, Ashish and Ientilucci, Emmett}, title = {CASPA: Graph-Structured Concept Anchors for Modality-Agnostic Adaptation in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31566-31576} }
4D-RGPT: Toward Region-level 4D Understanding via Perceptual Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Chiao-An and Hachiuma, Ryo and Liu, Sifei and Radhakrishnan, Subhashree and Yeh, Raymond A. and Wang, Yu-Chiang Frank and Chen, Min-Hung}, title = {4D-RGPT: Toward Region-level 4D Understanding via Perceptual Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31042-31053} }
EasyOmnimatte: Taming Pretrained Inpainting Diffusion Models for End-to-End Video Layered Decompositio-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yihan and Chen, Xuelin and Cun, Xiaodong}, title = {EasyOmnimatte: Taming Pretrained Inpainting Diffusion Models for End-to-End Video Layered Decompositio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43341-43351} }
Interactive Episodic Memory with User Feedback-
[pdf]
[supp]
[bibtex]@InProceedings{Subedi_2026_CVPR, author = {Subedi, Nikesh and Bazzani, Loris and Al-Halah, Ziad}, title = {Interactive Episodic Memory with User Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38826-38835} }
PolySLGen: Online Multimodal Speaking-Listening Reaction Generation in Polyadic Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zhi-Yi and Markhorst, Thomas and Chew, Jouh Yeong and Zhang, Xucong}, title = {PolySLGen: Online Multimodal Speaking-Listening Reaction Generation in Polyadic Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29379-29390} }
Bypassing the Transport Plan: Dynamic Reweighting for Out-of-Distribution Detection with Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Yang and Liu, Weiming and Dan, Jun and Xu, Tengyue and Wang, Fan and Yu, Hua and Dong, Junhao and Liu, Jiao and Dong, Shunjie and Qi, Lianyong}, title = {Bypassing the Transport Plan: Dynamic Reweighting for Out-of-Distribution Detection with Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32026-32036} }
TDATR: Improving End-to-End Table Recognition via Table Detail-Aware Learning and Cell-Level Visual Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Chunxia and Liu, Chenyu and Xia, Pengcheng and Du, Jun and Yin, Baocai and Yin, Bing and Liu, Cong}, title = {TDATR: Improving End-to-End Table Recognition via Table Detail-Aware Learning and Cell-Level Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36838-36849} }
TopoCL: Topological Contrastive Learning for Medical Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Guangyu and Gu, Pengfei and Liang, Peixian and Lalor, John P. and Chambers, Erin Wolf and Chen, Danny Z.}, title = {TopoCL: Topological Contrastive Learning for Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42681-42690} }
MAPS: Preserving Vision-Language Representations via Module-Wise Proximity Scheduling for Better Vision-Language-Action Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Chengyue and Zhang, Mellon M. and Azarcon, Robert and Chou, Glen and Kira, Zsolt}, title = {MAPS: Preserving Vision-Language Representations via Module-Wise Proximity Scheduling for Better Vision-Language-Action Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32451-32462} }
TRivia: Self-supervised Fine-tuning of Vision-Language Models for Table Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junyuan and Wang, Bin and Zhang, Qintong and Wu, Fan and Wen, Zichen and Lu, Jialin and Shan, Junjie and Zhao, Ziqi and Yang, Shuya and Wang, Ziling and Miao, Ziyang and Zhong, Huaping and Zang, Yuhang and Dong, Xiaoyi and Chow, Ka-Ho and He, Conghui}, title = {TRivia: Self-supervised Fine-tuning of Vision-Language Models for Table Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33196-33206} }
AudioStory: Generating Long-Form Narrative Audio with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yuxin and Wang, Teng and Ge, Yuying and Ma, Shijie and Ge, Yixiao and Zou, Wei}, title = {AudioStory: Generating Long-Form Narrative Audio with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37735-37744} }
DialogueVPR: Towards Conversational Visual Place Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yukun and Wang, Changwei and Pei, Xingtian and Xu, Shibiao and Xu, Wenhao and Chen, Shunpeng and Zhang, Yu and Zhang, Ke and Xu, Rongtao and Feng, Xuxiang and Wang, Pengyang}, title = {DialogueVPR: Towards Conversational Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41100-41110} }
RC-NF: Robot-Conditioned Normalizing Flow for Real-Time Anomaly Detection in Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shijie and Zhu, Bin and Yang, Jiarui and Zhao, Xiangyu and Chen, Jingjing and Jiang, Yu-Gang}, title = {RC-NF: Robot-Conditioned Normalizing Flow for Real-Time Anomaly Detection in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43050-43060} }
ClipGStream: Clip-Stream Gaussian Splatting for Any Length and Any Motion Multi-View Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Jie and Wu, Jiahao and Wang, Chao and Yang, Jiayu and Zheng, Xiaoyun and Xiong, Kaiqiang and Wang, Zhanke and Yan, Jinbo and Gao, Feng and Wang, Ronggang}, title = {ClipGStream: Clip-Stream Gaussian Splatting for Any Length and Any Motion Multi-View Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41022-41032} }
Why Not Hyperparameter-Friendly Optimisation? A Monotonic Adaptive Norm Rescaling Approach For Long-Tailed Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuo and Li, Chenqi and Zhu, Tingting}, title = {Why Not Hyperparameter-Friendly Optimisation? A Monotonic Adaptive Norm Rescaling Approach For Long-Tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36947-36956} }
Multi-view Pyramid Transformer: Look Coarser to See Broader-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Gyeongjin and Yang, Seungkwon and Nam, Seungtae and Lee, Younggeun and Kim, Jungwoo and Park, Eunbyung}, title = {Multi-view Pyramid Transformer: Look Coarser to See Broader}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37380-37390} }
EasyV2V: A High-quality Instruction-based Video Editing Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Jinjie and Wang, Chaoyang and Qian, Gordon Guocheng and Menapace, Willi and Tulyakov, Sergey and Ghanem, Bernard and Wonka, Peter and Mirzaei, Ashkan}, title = {EasyV2V: A High-quality Instruction-based Video Editing Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30435-30445} }
Open-Ended Instruction Realization with LLM-Enabled Multi-Planner Scheduling in Autonomous Vehicles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiawei and Gong, Xun and Fang, Fen and Yang, Muli and Qu, Bohao and Hu, Yunfeng and Chen, Hong and Yang, Xulei and Guo, Qing}, title = {Open-Ended Instruction Realization with LLM-Enabled Multi-Planner Scheduling in Autonomous Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32070-32081} }
OralGPT-Plus: Learning to Use Visual Tools via Reinforcement Learning for Panoramic X-ray Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yuxuan and Hao, Jing and Chen, Hong and Bao, Jiahao and Shao, Yihua and Liang, Yuci and Hung, Kuo Feng and Tang, Hao}, title = {OralGPT-Plus: Learning to Use Visual Tools via Reinforcement Learning for Panoramic X-ray Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35373-35383} }
LS-ViT: Least-Squares Hessian Based Block Reconstruction for Low-Bit Post-Training Quantization of Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Hwang_2026_CVPR, author = {Hwang, Hyunha and Nguyen, Xuan Truong and Lee, Hyuk-Jae}, title = {LS-ViT: Least-Squares Hessian Based Block Reconstruction for Low-Bit Post-Training Quantization of Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33588-33597} }
CURE: Curriculum-guided Multi-task Training for Reliable Anatomy Grounded Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Messina_2026_CVPR, author = {Messina, Pablo and Villa, Andr\'es and Alcazar, Juan Leon and Sanchez, Karen and Hinojosa, Carlos and Parra, Denis and Soto, Alvaro and Ghanem, Bernard}, title = {CURE: Curriculum-guided Multi-task Training for Reliable Anatomy Grounded Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36279-36289} }
Personalized Longitudinal Medical Report Generation via Temporally-Aware Federated Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, He and Togo, Ren and Ogawa, Takahiro and Hirata, Kenji and Tang, Minghui and Yoshimura, Takaaki and Sugimori, Hiroyuki and Nishioka, Noriko and Shimizu, Yukie and Kudo, Kohsuke and Haseyama, Miki}, title = {Personalized Longitudinal Medical Report Generation via Temporally-Aware Federated Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42701-42710} }
Free-Grained Hierarchical Visual Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Seulki and Wang, Zilin and Yu, Stella X.}, title = {Free-Grained Hierarchical Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32767-32776} }
KV-Tracker: Real-Time Pose Tracking with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taher_2026_CVPR, author = {Taher, Marwan and Alzugaray, Ignacio and Mazur, Kirill and Kong, Xin and Davison, Andrew}, title = {KV-Tracker: Real-Time Pose Tracking with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28990-28999} }
Diffusion MRI Transformer with a Diffusion Space Rotary Positional Embedding (D-RoPE)-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kung_2026_CVPR, author = {Kung, Gustavo Chau Loo and Abbasi, Mohammad and Blank, Camila and Zhang, Juze and Wang, Alan Q. and Ostmeier, Sophie and Chaudhari, Akshay and Pohl, Kilian and Adeli, Ehsan}, title = {Diffusion MRI Transformer with a Diffusion Space Rotary Positional Embedding (D-RoPE)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38430-38441} }
M4V: Multimodal Mamba for Efficient Text-to-Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiancheng and Zhang, Gengwei and Jie, Zequn and Jiao, Siyu and Qian, Yinlong and Chen, Ling and Wei, Yunchao and Ma, Lin}, title = {M4V: Multimodal Mamba for Efficient Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36747-36757} }
TableMix: Enhancing Multimodal Table Reasoning in MLLMs from a Data-Centric Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chaohu and Wang, Shida and Wang, Yubo and Xu, Linli}, title = {TableMix: Enhancing Multimodal Table Reasoning in MLLMs from a Data-Centric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33553-33565} }
IntrinsicWeather: Controllable Weather Editing in Intrinsic Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yixin and Zhu, Zuo-Liang and Yang, Jian and Ha\v{s}an, Milo\v{s} and Xie, Jin and Wang, Beibei}, title = {IntrinsicWeather: Controllable Weather Editing in Intrinsic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30772-30781} }
Information-Theoretic Decomposition for Multimodal Interaction Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zequn and Wei, Yake and Ni, Haotian and Xu, Zhihao and Hu, Di}, title = {Information-Theoretic Decomposition for Multimodal Interaction Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30278-30287} }
Learnability-Guided Diffusion for Dataset Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Chan-Santiago_2026_CVPR, author = {Chan-Santiago, Jeffrey A. and Shah, Mubarak}, title = {Learnability-Guided Diffusion for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41657-41666} }
Taming Sampling Perturbations with Variance Expansion Loss for Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qifan and Zhou, Xingyu and Zhang, Jinhua and You, Weiyi and Gu, Shuhang}, title = {Taming Sampling Perturbations with Variance Expansion Loss for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43643-43652} }
Seeing Beyond: Extrapolative Domain Adaptive Panoramic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yuanfan and Peng, Kunyu and Zheng, Xu and Yang, Kailun}, title = {Seeing Beyond: Extrapolative Domain Adaptive Panoramic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42115-42125} }
PointGS: Semantic-Consistent Unsupervised 3D Point Cloud Segmentation with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yixiao and Li, Qingyong and Wang, Wen and Yan, Zhicheng}, title = {PointGS: Semantic-Consistent Unsupervised 3D Point Cloud Segmentation with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33343-33352} }
Venus: Benchmarking and Empowering Multimodal Large Language Models for Aesthetic Guidance and Cropping-
[pdf]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Tianxiang and He, Hulingxiao and Peng, Yuxin}, title = {Venus: Benchmarking and Empowering Multimodal Large Language Models for Aesthetic Guidance and Cropping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37766-37776} }
IEBGL:An Interpretability-Enhanced Brain Graph Learning Framework with LLM-Instructed Topology and Literature-Augmented Semantics-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Yihang and Huang, Shuo and Zhang, Li and Wang, Meiling and Zhang, Li}, title = {IEBGL:An Interpretability-Enhanced Brain Graph Learning Framework with LLM-Instructed Topology and Literature-Augmented Semantics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35331-35340} }
Mind the Discriminability Trap in Source-Free Cross-domain Few-shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenyu and Zou, Yixiong and Li, Yuhua and Li, Ruixuan and Chen, Guangyao}, title = {Mind the Discriminability Trap in Source-Free Cross-domain Few-shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36978-36988} }
Progressive Supernet Training for Efficient Visual Autoregressive Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaoyue and Shi, Yuling and Li, Kaiyuan and Wang, Huandong and Li, Yong and Gu, Xiaodong and Chen, Xinlei and Lin, Mingbao}, title = {Progressive Supernet Training for Efficient Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37950-37959} }
Multi-Paradigm Collaborative Adversarial Attack Against Multi-Modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuanbo and Xu, Tianyang and Hu, Cong and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Multi-Paradigm Collaborative Adversarial Attack Against Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30065-30075} }
Incremental Object Detection via Future-Aware Decoupled Cross-Head Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Chenfeng and Cheng, De and Luo, Wenlong and Zeng, Mingyue and Zhang, Shizhou and Wang, Nannan and Gao, Xinbo}, title = {Incremental Object Detection via Future-Aware Decoupled Cross-Head Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39935-39944} }
Learning to Drive is a Free Gift: Large-Scale Label-Free Autonomy Pretraining from Unposed In-The-Wild Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Strong_2026_CVPR, author = {Strong, Matthew and Chang, Wei-Jer and Herau, Quentin and Yang, Jiezhi and Hu, Yihan and Peng, Chensheng and Zhan, Wei}, title = {Learning to Drive is a Free Gift: Large-Scale Label-Free Autonomy Pretraining from Unposed In-The-Wild Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32144-32153} }
TransPrune: Token Transition Pruning for Efficient Large Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ao and Duan, Yuxiang and Zhang, Jinghui and Ma, Congbo and Xie, Yutong and Carneiro, Gustavo and Yaqub, Mohammad and Wang, Hu}, title = {TransPrune: Token Transition Pruning for Efficient Large Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39529-39538} }
SEA-Flow3D: Simplified, Efficient, and Accurate Scene Flow via Spatial Vector Sampling and Multi-scale Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Ling_2026_CVPR, author = {Ling, Han and Sun, Quansen and Yao, Yinghua and Tsang, Ivor and Sun, Yinghui}, title = {SEA-Flow3D: Simplified, Efficient, and Accurate Scene Flow via Spatial Vector Sampling and Multi-scale Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36475-36484} }
SAMTok: Representing Any Mask with Two Words-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yikang and Zhang, Tao and Gong, Dengxian and Wu, Yuanzheng and Tian, Ye and Wang, Haochen and Yuan, Haobo and Wang, Jiacong and Qi, Lu and Fei, Hao and Ji, Shunping and Wang, Anran and Wang, Zhuochen and Wang, Yujing and Chen, Cheng and Li, Xiangtai}, title = {SAMTok: Representing Any Mask with Two Words}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37852-37863} }
Iris: Integrating Language into Diffusion-based Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyao and Ni, Jingcheng and Wang, Daniel and Rim, Patrick and Chung, Younjoon and Yang, Fengyu and Hong, Byung-Woo and Wong, Alex}, title = {Iris: Integrating Language into Diffusion-based Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34193-34205} }
DecoVLN: Decoupling Observation, Reasoning, and Correction for Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xin_2026_CVPR, author = {Xin, Zihao and Li, Wentong and Jiang, Yixuan and Wang, Bin and Cong, Runmin and Qin, Jie and Huang, Shengjun}, title = {DecoVLN: Decoupling Observation, Reasoning, and Correction for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32410-32420} }
SafeLogo: Turning Your Logos into Jailbreak Shields via Micro-Regional Adversarial Training-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Zhiyi and Zhang, Xiaoyue and Man, Tianxing}, title = {SafeLogo: Turning Your Logos into Jailbreak Shields via Micro-Regional Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37611-37620} }
Attention Surgery: An Efficient Recipe to Linearize Your Video Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghafoorian_2026_CVPR, author = {Ghafoorian, Mohsen and Korzhenkov, Denis and Habibian, Amirhossein}, title = {Attention Surgery: An Efficient Recipe to Linearize Your Video Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32915-32925} }
VIRO: Robust and Efficient Neuro-Symbolic Reasoning with Verification for Referring Expression Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Hyejin and Kwon, Junhyuk and Kwak, Suha and Ok, Jungseul}, title = {VIRO: Robust and Efficient Neuro-Symbolic Reasoning with Verification for Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33426-33435} }
Better, Stronger, Faster: Tackling the Trilemma in MLLM-based Segmentation with Simultaneous Textual Mask Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiazhen and Feng, Mingkuan and Chen, Long}, title = {Better, Stronger, Faster: Tackling the Trilemma in MLLM-based Segmentation with Simultaneous Textual Mask Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33121-33130} }
Mirror Illusion Art-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaopei and Li, Zeyuan and Zhu, Jun and Hu, Xiaolin}, title = {Mirror Illusion Art}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31577-31585} }
DiffusionHarmonizer: Bridging Neural Reconstruction and Photorealistic Simulation with Online Diffusion Enhancer-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and T\'othov\'a, Katar{\'\i}na and Wang, Zian and Yin, Kangxue and Turki, Haithem and de Lutio, Riccardo and Chang, Yen-Yu and Litany, Or and Fidler, Sanja and Gojcic, Zan}, title = {DiffusionHarmonizer: Bridging Neural Reconstruction and Photorealistic Simulation with Online Diffusion Enhancer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43494-43504} }
Parallel Rigidity Matters for Bundle Adjustment-
[pdf]
[supp]
[bibtex]@InProceedings{Manam_2026_CVPR, author = {Manam, Lalit and Govindu, Venu Madhav}, title = {Parallel Rigidity Matters for Bundle Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29035-29046} }
BiPreManip: Learning Affordance-Based Bimanual Preparatory Manipulation through Anticipatory Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Yan and Jiang, Feng and He, Zichen and Li, Xiaoqi and Liu, Yuchen and Li, Zhiyu and Wu, Ruihai and Dong, Hao}, title = {BiPreManip: Learning Affordance-Based Bimanual Preparatory Manipulation through Anticipatory Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42430-42440} }
RegionRoute: Regional Style Transfer with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Bowen and Zuena, Jake and Bovik, Alan C. and Kothandaraman, Divya}, title = {RegionRoute: Regional Style Transfer with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35736-35746} }
Merge3D: Efficient 3D Multimodal LLMs via Joint 2D-3D Token Merging-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Tianbo and Yang, Xingyi and Wang, Xinchao}, title = {Merge3D: Efficient 3D Multimodal LLMs via Joint 2D-3D Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31066-31077} }
IMAIA: Interactive Maps AI Assistant for Travel Planning and Geo-Spatial Intelligence-
[pdf]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Jieren and Hu, Zhizhang and He, Ziyan and Cvetkovic, Aleksandar and Chung, Pak Kiu and Yankov, Dragomir and Zhang, Chiqun}, title = {IMAIA: Interactive Maps AI Assistant for Travel Planning and Geo-Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40064-40073} }
ActiveGrasp: Information-Guided Active Grasping with Calibrated Energy-based Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Boshu and Jiang, Wen and Daniilidis, Kostas}, title = {ActiveGrasp: Information-Guided Active Grasping with Calibrated Energy-based Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42418-42429} }
Bridging Privacy and Provenance: Traceable Virtual Identity Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xianhan and Hu, Xiaoxiao and Li, Sheng and Qian, Zhenxing and Zhang, Xinpeng}, title = {Bridging Privacy and Provenance: Traceable Virtual Identity Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32367-32376} }
Convexity-Aware Noise Calibration: A Self-Supervised Framework for Noise-Level-Unknown Image Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhan and Wang, Leiquan and Wu, Chunlei and Meng, Yu}, title = {Convexity-Aware Noise Calibration: A Self-Supervised Framework for Noise-Level-Unknown Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29929-29938} }
LEMON: A Large Endoscopic MONocular Dataset and Foundation Model for Perception in Surgical Settings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Vercauteren, Tom and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {LEMON: A Large Endoscopic MONocular Dataset and Foundation Model for Perception in Surgical Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42659-42669} }
FairLLaVA: Fairness-Aware Parameter-Efficient Fine-Tuning for Large Vision-Language Assistants-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhosale_2026_CVPR, author = {Bhosale, Mahesh and Wasi, Abdul and Srivastava, Shantam and Latif, Shifa and Luan, Tianyu and Gao, Mingchen and Doermann, David and Gong, Xuan}, title = {FairLLaVA: Fairness-Aware Parameter-Efficient Fine-Tuning for Large Vision-Language Assistants}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42615-42625} }
Large-scale Robust Enhanced Ensemble Clustering via Outlier Decoupling-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiaxuan and Duan, Lei and Wang, Xinye and Du, Liang}, title = {Large-scale Robust Enhanced Ensemble Clustering via Outlier Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39691-39700} }
Event-Based Motion Deblurring Using Task-Oriented 3D Gaussian Event Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Shengdong and Ma, Haoxiang and Chen, Hao and Yang, Zhen and Deng, Yongjian}, title = {Event-Based Motion Deblurring Using Task-Oriented 3D Gaussian Event Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29547-29556} }
IsoCLIP: Decomposing CLIP Projectors for Efficient Intra-modal Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Magistri_2026_CVPR, author = {Magistri, Simone and Goswami, Dipam and Mistretta, Marco and Twardowski, Bart{\l}omiej and van de Weijer, Joost and Bagdanov, Andrew D.}, title = {IsoCLIP: Decomposing CLIP Projectors for Efficient Intra-modal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29315-29324} }
RISE: Single Static Radar-based Indoor Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Kaichen and Dodds, Laura and Afzal, Sayed Saad and Adib, Fadel}, title = {RISE: Single Static Radar-based Indoor Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32194-32205} }
FlowDIS: Language-Guided Dichotomous Image Segmentation with Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargsyan_2026_CVPR, author = {Sargsyan, Andranik and Navasardyan, Shant}, title = {FlowDIS: Language-Guided Dichotomous Image Segmentation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42039-42048} }
OmniFood8K: Single-Image Nutrition Estimation via Hierarchical Frequency-Aligned Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Dongjian and Min, Weiqing and Jiang, Qian and Lin, Xing and Jin, Xin and Jiang, Shuqiang}, title = {OmniFood8K: Single-Image Nutrition Estimation via Hierarchical Frequency-Aligned Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41562-41572} }
SketchFaceGS: Real-Time Sketch-Driven Face Editing and Generation with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bo and Kang, Jiahao and Ma, Yubo and Liu, Feng-Lin and Liu, Bin and Zhang, Fang-Lue and Gao, Lin}, title = {SketchFaceGS: Real-Time Sketch-Driven Face Editing and Generation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40020-40030} }
DeepfakeImpact: A Two-Stage Benchmark with Real-World Impact in Deepfake Detection-
[pdf]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Chaoyu and Zhang, Han and Luo, Siqiang}, title = {DeepfakeImpact: A Two-Stage Benchmark with Real-World Impact in Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35451-35461} }
Learning Like Humans: Analogical Concept Learning for Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jizhou and Ding, Chenhao and He, Yuhang and Wang, Qiang and Wang, Shaokun and Dong, SongLin and Gong, Yihong}, title = {Learning Like Humans: Analogical Concept Learning for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28734-28743} }
Logit-Margin Repulsion for Backdoor Defense-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiguo and Xu, Dongsheng and Zhong, Ruizhi and Pi, Jiacheng and Huang, Xingxing and Ruan, Wenjie}, title = {Logit-Margin Repulsion for Backdoor Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34918-34928} }
Compositional Text-to-Image Generation Via Region-aware Bimodal Direct Preference Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhuohan and Peng, Wujian and Chen, Yitong and Wu, Zuxuan}, title = {Compositional Text-to-Image Generation Via Region-aware Bimodal Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36604-36614} }
Agentic Video Summarization via Self-Reflecting Multimodal Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Miaotian and Dou, Shuguang and Li, Yin and Men, Aidong and Jiang, Dongsheng}, title = {Agentic Video Summarization via Self-Reflecting Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40516-40526} }
Easy2Hard: From Partially to Fully Unmatched Modalities as Negative Samples in Contrastive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhicheng and Liu, Yichen and Ge, Chang and Jiang, Xiaopeng}, title = {Easy2Hard: From Partially to Fully Unmatched Modalities as Negative Samples in Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30226-30234} }
Revisiting the Necessity of Full Accuracy: Weakly Supervised Object-Level Offset Correction for Misaligned Building Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Junda and Liu, Yanmeng and Zeng, Xiangqiang and Wu, Jinrong and Qu, Ying and Zhang, Libao}, title = {Revisiting the Necessity of Full Accuracy: Weakly Supervised Object-Level Offset Correction for Misaligned Building Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34854-34864} }
InterPhys: Physics-aware Human Motion Synthesis in a Dynamic Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Chaoyue and Mao, Wei and Liu, Miaomiao}, title = {InterPhys: Physics-aware Human Motion Synthesis in a Dynamic Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30729-30739} }
Taming Noise-Induced Prototype Degradation for Privacy-Preserving Personalized Federated Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhua and Zhang, Qinnan and Li, Xiaodong and Zhang, Huan and Sun, Yifan and Qiu, Wangjie and Zhang, Hainan and Tong, Yongxin and Zheng, Zhiming}, title = {Taming Noise-Induced Prototype Degradation for Privacy-Preserving Personalized Federated Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39414-39424} }
Learning Mutual View Information Graph for Adaptive Adversarial Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Yihang and Hu, Senkang and An, Haonan and Fang, Zhengru and Cao, Hangcheng and Fang, Yuguang}, title = {Learning Mutual View Information Graph for Adaptive Adversarial Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42321-42330} }
ActAvatar: Temporally-Aware Precise Action Control for Talking Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Ziqiao and Chen, Yi and Ma, Yifeng and Zhang, Guozhen and Sun, Zhiyao and Zhou, Zixiang and Zhang, Youliang and Zhou, Zhengguang and Fan, Zhaoxin and Liu, Hongyan and Zhou, Yuan and Lu, Qinglin and He, Jun}, title = {ActAvatar: Temporally-Aware Precise Action Control for Talking Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39983-39993} }
Enhancing Video Vision Language Model with Hippocampal Sensing-
[pdf]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Xu}, title = {Enhancing Video Vision Language Model with Hippocampal Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33682-33692} }
EgoMind: Activating Spatial Cognition through Linguistic Reasoning in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhenghao and Wang, Huiqun and Huang, Di}, title = {EgoMind: Activating Spatial Cognition through Linguistic Reasoning in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38616-38626} }
SPREAD: Spatial-Physical REasoning via geometry Aware Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Minzhang and Shao, Kuixiang and Li, Xuebing and Jiao, Yuyang and Bai, Yinuo and Zhou, Hengan and Shen, Sixian and Gu, Jiayuan and Yu, Jingyi}, title = {SPREAD: Spatial-Physical REasoning via geometry Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31008-31018} }
SHands: A Multi-View Dataset and Benchmark for Surgical Hand-Gesture and Error Recognition Toward Medical Training-
[pdf]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Le and dos Santos, Thiago Freitas and Magnenat-Thalmann, Nadia and Wac, Katarzyna}, title = {SHands: A Multi-View Dataset and Benchmark for Surgical Hand-Gesture and Error Recognition Toward Medical Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42879-42890} }
Changes in Real Time: Online Scene Change Detection with Multi-View Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Galappaththige_2026_CVPR, author = {Galappaththige, Chamuditha Jayanga and Lai, Jason and Windrim, Lloyd and Dansereau, Donald and Suenderhauf, Niko and Miller, Dimity}, title = {Changes in Real Time: Online Scene Change Detection with Multi-View Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32246-32256} }
Superman: Unifying Skeleton and Vision for Human Motion Perception and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinshun and Li, Peiming and Wang, Ziyi and Fang, Zhongbin and Deng, Zhichao and Wu, Songtao and Li, Jason and Liu, Mengyuan}, title = {Superman: Unifying Skeleton and Vision for Human Motion Perception and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38335-38344} }
Controllable Federated Prompt Learning at Test Time-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Rui and Bai, Liang and Guo, Yanming and Ruan, Yirun and Yu, Tianyuan and Lu, Zhihe}, title = {Controllable Federated Prompt Learning at Test Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39455-39465} }
Diffusion Sampling Path Tells More: An Efficient Plug-and-Play Strategy for Sample Filtering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Sixian and Tang, Zhiwei and Chang, Tsung-Hui}, title = {Diffusion Sampling Path Tells More: An Efficient Plug-and-Play Strategy for Sample Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36030-36039} }
Group Editing: Edit Multiple Images in One Go-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yue and Wang, Xinyu and Ma, Qianli and Wang, Qinghe and Zheng, Mingzhe and Yang, Xiangpeng and Li, Hao and Zhao, Chongbo and Ying, Jixuan and Yang, Harry and Liu, Hongyu and Chen, Qifeng}, title = {Group Editing: Edit Multiple Images in One Go}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43418-43428} }
HERO: Hierarchical Embedding-Refinement for Open-Vocabulary Temporal Sentence Grounding in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Tingting and Tao, Xinsong and Yin, Yufei and Tan, Min and Zhao, Sicheng and Yu, Zhou}, title = {HERO: Hierarchical Embedding-Refinement for Open-Vocabulary Temporal Sentence Grounding in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31867-31876} }
Conversational Image Segmentation: Grounding Abstract Concepts with Scalable Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sahoo_2026_CVPR, author = {Sahoo, Aadarsh and Gkioxari, Georgia}, title = {Conversational Image Segmentation: Grounding Abstract Concepts with Scalable Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39476-39485} }
Demo2Tutorial: From Human Experience to Multimodal Software Tutorials-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Zechen and Chen, Zhiheng and Lin, Yiqi and Lin, Kevin Qinghong and Gao, Difei and Guo, Xiangwu and Wang, Xin and Shou, Mike Zheng}, title = {Demo2Tutorial: From Human Experience to Multimodal Software Tutorials}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29588-29597} }
Reliable Clustering Number Estimation for Contrastive Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Zhou, Pei and Bai, Lanxi and Cheng, Li and Nie, Jia and Min, Shiquan and Zhu, Jiangping}, title = {Reliable Clustering Number Estimation for Contrastive Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30162-30171} }
F$^2$-Assist: Multi-Phase Fetal Growth Forecast and Report Generation from Ultrasound Examination-
[pdf]
[bibtex]@InProceedings{Pu_2026_CVPR, author = {Pu, Bin and Liang, Xusheng and Ding, Xinpeng and Wu, Jinlin and Lei, Zhen and Li, Shengli and Li, Kenli and Ma, Jiawei}, title = {F\${\textasciicircum}2\$-Assist: Multi-Phase Fetal Growth Forecast and Report Generation from Ultrasound Examination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35341-35350} }
Online3R: Online Learning for Consistent Sequential Reconstruction Based on Geometry Foundation Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shunkai and Yan, Zike and Xue, Fei and Wu, Dong and Deng, Yuchen and Zha, Hongbin}, title = {Online3R: Online Learning for Consistent Sequential Reconstruction Based on Geometry Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36529-36538} }
Linking Modality Isolation in Heterogeneous Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Changxing and Chao, Zichen and Chen, Siheng}, title = {Linking Modality Isolation in Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39765-39774} }
Expand and Prune: Maximizing Trajectory Diversity for Effective GRPO in Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Shiran and Huang, Chenyi and Ai, Yuang and Fan, Qihang and Huang, Huaibo and He, Ran}, title = {Expand and Prune: Maximizing Trajectory Diversity for Effective GRPO in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41913-41922} }
Disentangled Textual Priors for Diffusion-based Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Lei and Liu, Xin and Tong, Xinze and Li, Zhiliang and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {Disentangled Textual Priors for Diffusion-based Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38228-38237} }
NG-GS: NeRF-guided 3D Gaussian Splatting Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yi and Wang, Tao and Jin, Yi and Lang, Congyan and Li, Yidong and Ling, Haibin}, title = {NG-GS: NeRF-guided 3D Gaussian Splatting Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42061-42070} }
Interactive Tracking: A Human-in-the-Loop Paradigm with Memory-Augmented Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yuqing and Zeng, Guotian and Yuan, Zhenqiao and He, Zhenyu and Li, Xin and Wang, Yaowei and Yang, Ming-Hsuan}, title = {Interactive Tracking: A Human-in-the-Loop Paradigm with Memory-Augmented Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35166-35176} }
Degradation-Robust Fusion: An Efficient Degradation-Aware Diffusion Framework for Multimodal Image Fusion in Arbitrary Degradation Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yu and Liu, Yu and Wu, Zhong-Cheng and Cheng, Juan and Li, Huafeng and Chen, Xun}, title = {Degradation-Robust Fusion: An Efficient Degradation-Aware Diffusion Framework for Multimodal Image Fusion in Arbitrary Degradation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33848-33858} }
TUDSR: Twice Upsampling-Diffusion for Higher Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhiqiang and Dong, Yitong and Wei, Xian}, title = {TUDSR: Twice Upsampling-Diffusion for Higher Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38208-38217} }
TriLite: Efficient Weakly Supervised Object Localization with Universal Visual Features and Tri-Region Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sabaghi_2026_CVPR, author = {Sabaghi, Arian and Oramas, Jose}, title = {TriLite: Efficient Weakly Supervised Object Localization with Universal Visual Features and Tri-Region Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41386-41395} }
VLM-3R: Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Zhiwen and Zhang, Jian and Li, Renjie and Zhang, Junge and Chen, Runjin and Hu, Hezhen and Wang, Kevin and Wang, Peihao and Qu, Huaizhi and Zhou, Shijie and Wang, Dilin and Yan, Zhicheng and Xu, Hongyu and Theiss, Justin and Chen, Tianlong and Li, Jiachen and Tu, Zhengzhong and Wang, Zhangyang and Ranjan, Rakesh}, title = {VLM-3R: Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31054-31065} }
Sparsely Timing the Change: A Spiking Temporal Framework for Remote Sensing Interpretation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shilong and Xie, Xiurui and Zhan, Qiugang and Wang, Luochao and Deng, Yong and Liu, Guisong}, title = {Sparsely Timing the Change: A Spiking Temporal Framework for Remote Sensing Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34575-34585} }
FreeForm: Reduced-Order Deformable Simulation from Particle-Based Skinning Eigenmodes-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Donglai and Modi, Vismay and Dagli, Rishit and Trusty, Ty and Daviet, Gilles and Chen, Anka He and Sharp, Nicholas and Levin, David I.W.}, title = {FreeForm: Reduced-Order Deformable Simulation from Particle-Based Skinning Eigenmodes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32475-32484} }
LEAD: Minimizing Learner-Expert Asymmetry in End-to-End Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Long and Fauth, Micha and Jaeger, Bernhard and Dauner, Daniel and Igl, Maximilian and Geiger, Andreas and Chitta, Kashyap}, title = {LEAD: Minimizing Learner-Expert Asymmetry in End-to-End Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39775-39785} }
PromptLoop: Plug-and-Play Prompt Refinement via Latent Feedback for Diffusion Model Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Suhyeon and Ye, Jong Chul}, title = {PromptLoop: Plug-and-Play Prompt Refinement via Latent Feedback for Diffusion Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41859-41869} }
DIMOS: Disentangling Instance-level Moving Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Hongxiang and Ren, Hongwei and Lin, Xiaopeng and Huang, Yulong and Xie, Zeke and Cheng, Bojun}, title = {DIMOS: Disentangling Instance-level Moving Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39806-39816} }
PrITTI: Primitive-based Generation of Controllable and Editable 3D Semantic Urban Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tze_2026_CVPR, author = {Tze, Christina Ourania and Dauner, Daniel and Liao, Yiyi and Tsishkou, Dzmitry and Geiger, Andreas}, title = {PrITTI: Primitive-based Generation of Controllable and Editable 3D Semantic Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32613-32624} }
DSCA: Dynamic Subspace Concept Alignment for Lifelong VLM Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Das_2026_CVPR, author = {Das, Gyanendra and Jena, Sai}, title = {DSCA: Dynamic Subspace Concept Alignment for Lifelong VLM Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40772-40781} }
BiGain: Unified Token Compression for Joint Generation and Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiacheng and Tang, Shengkun and Cui, Jiacheng and Xu, Dongkuan and Shen, Zhiqiang}, title = {BiGain: Unified Token Compression for Joint Generation and Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31931-31940} }
HAWK: Head Importance-Aware Visual Token Pruning in Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Qihui and Zhang, Tao and Wang, Yuchen and Chen, Shuangwu and Tan, Xiaobin and Yang, Jian and Liu, Yang and Pan, Yinfei}, title = {HAWK: Head Importance-Aware Visual Token Pruning in Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39583-39592} }
EthoCLIP: Ontology-Enhanced Video-Language Pretraining for Animal Behavior Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Jing_2026_CVPR, author = {Jing, Yinuo and Wu, Jinyan and Yang, Zixi and Liang, Kongming and Zhu, Xiatian and Ma, Zhanyu}, title = {EthoCLIP: Ontology-Enhanced Video-Language Pretraining for Animal Behavior Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31196-31206} }
MapReduce LoRA: Advancing the Pareto Front in Multi-Preference Optimization for Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Chieh-Yun and Wang, Zhonghao and Chen, Qi and Ye, Zhifan and Shi, Min and Zhao, Yue and Zhao, Yinan and Qu, Hui and Lin, Wei-An and Shen, Yiru and Kale, Ajinkya and Essa, Irfan and Shi, Humphrey}, title = {MapReduce LoRA: Advancing the Pareto Front in Multi-Preference Optimization for Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34375-34384} }
Agentic Retoucher for Text-To-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Shaocheng and Liang, Jianfeng and Cai, Chunlei and Geng, Cong and Duan, Huiyu and Zhang, Xiaoyun and Hu, Qiang and Zhai, Guangtao}, title = {Agentic Retoucher for Text-To-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29114-29125} }
Sensor2Sensor: Cross-Embodiment Sensor Conversion for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Sun, Bo and Bai, Yijing and Casser, Vincent and Peng, Songyou and Zhu, Zehao and Shih, Meng-Li and Masotto, Xander and Su, Shih-Yang and Parvate, Kanaad and Ge, Tiancheng and Bieske, Linn and Anguelov, Dragomir and Tan, Mingxing and Jiang, Chiyu Max}, title = {Sensor2Sensor: Cross-Embodiment Sensor Conversion for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32093-32102} }
RL-ScanIQA: Reinforcement-Learned Scanpaths for Blind 360deg Image Quality Assessment-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yujia and Li, Yuyan and Liu, Jiuming and Zhang, Fang-Lue and Zheng, Xinhu and Dodgson, Neil.A}, title = {RL-ScanIQA: Reinforcement-Learned Scanpaths for Blind 360deg Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37401-37412} }
Wavelet-Driven 3D Anomaly Detection under Pose-Agnostic and Sparse-View-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Mingwen and Zhang, Qiao and Chen, Xinyuan and Lv, Xiang and Meng, Lingzhuang and Liu, Chang and Zhan, Qinglin and Jian, Ling}, title = {Wavelet-Driven 3D Anomaly Detection under Pose-Agnostic and Sparse-View}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43083-43092} }
OmniLottie: Generating Vector Animations via Parameterized Lottie Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yiying and Cheng, Wei and Chen, Sijin and Fu, Honghao and Zeng, Xianfang and Cai, Yujun and Yu, Gang and Ma, Xingjun}, title = {OmniLottie: Generating Vector Animations via Parameterized Lottie Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39293-39303} }
GeneVAR: Causal MeanFlow for Autoregressive Gene-to-WSI Tile Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianwei and Yang, Fan and Li, Xin and Zhai, Qiang and Luo, Ao and Ren, Ziqi and Jiao, Zhicheng and Cheng, Hong}, title = {GeneVAR: Causal MeanFlow for Autoregressive Gene-to-WSI Tile Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34116-34125} }
Bilevel Layer-Positioning LoRA for Real Image Dehazing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yan and Ma, Long and Feng, Yuxin and Huang, Zhe and Zhou, Fan and Su, Zhuo}, title = {Bilevel Layer-Positioning LoRA for Real Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29969-29978} }
TrajTok: Learning Trajectory Tokens Enhances Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Chenhao and Zhang, Jieyu and Zhang, Jianing and Huang, Weikai and Kumar, Ashutosh and Kong, Quan and Tuzel, Oncel and Li, Chun-Liang and Krishna, Ranjay}, title = {TrajTok: Learning Trajectory Tokens Enhances Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31207-31218} }
3D-VCD: Hallucination Mitigation in 3D-LLM Embodied Agents through Visual Contrastive Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ogunleye_2026_CVPR, author = {Ogunleye, Makanjuola Adekunmi and Abdelrahman, Eman and Lourentzou, Ismini}, title = {3D-VCD: Hallucination Mitigation in 3D-LLM Embodied Agents through Visual Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40197-40207} }
SAT-RRG: LLM-Guided Self-Adaptive Training for Radiology Report Generation with Token-Level Push-Pull Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yunyi and Li, Yingshu and Chen, Tong and Liu, Lingqiao and Wang, Lei and Zhou, Luping}, title = {SAT-RRG: LLM-Guided Self-Adaptive Training for Radiology Report Generation with Token-Level Push-Pull Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35363-35372} }
dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xin_2026_CVPR, author = {Xin, Yi and Luo, Siqi and Xu, Tianxiang and Qin, Qi and Chen, Haoxing and Zhu, Kaiwen and Zhang, Zhiwei and He, Yangfan and Zhang, Rongchao and Bai, Jinbin and Cao, Shuo and Fu, Bin and He, Junjun and Liu, Yihao and Cao, Yuewen and Liu, Xiaohong}, title = {dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35726-35735} }
Conan: Progressive Learning to Reason Like a Detective over Multi-Scale Visual Evidence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Kun and Liu, Yuanxin and Yao, Linli and Cai, Yishuo and Zhou, Hao and Meng, Fandong and Zhou, Jie and Sun, Xu}, title = {Conan: Progressive Learning to Reason Like a Detective over Multi-Scale Visual Evidence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41089-41099} }
Chain-of-Models Pre-Training: Rethinking Training Acceleration of Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Jiawei and Wang, Shigeng and Li, Chao and Liu, Xiaolong and Yao, Anbang}, title = {Chain-of-Models Pre-Training: Rethinking Training Acceleration of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34491-34501} }
Masked Auto-Regressive Variational Acceleration: Fast Inference Makes Practical Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Yuxuan and Bai, Weimin and Wang, Yifei and Luo, Weijian and Sun, He}, title = {Masked Auto-Regressive Variational Acceleration: Fast Inference Makes Practical Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41881-41891} }
ZoomEarth: Active Perception for Ultra-High-Resolution Geospatial Vision-Language Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ruixun and Fu, Bowen and Song, Jiayi and Li, Kaiyu and Li, Wanchen and Xue, Lanxuan and Qiao, Hui and Zhang, Weizhan and Meng, Deyu and Cao, Xiangyong}, title = {ZoomEarth: Active Perception for Ultra-High-Resolution Geospatial Vision-Language Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34877-34888} }
IAFMNet: Information-Aware Feature Modulation for Efficient Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Junwei and Liu, Mengzu and Wang, Zhenyu and Wu, Fangfang and Wu, Sijia and Huang, Tao and Dong, Weisheng}, title = {IAFMNet: Information-Aware Feature Modulation for Efficient Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30564-30573} }
PortraitDirector: A Hierarchical Disentanglement Framework for Controllable and Real-time Facial Reenactment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Chaonan and Qi, Jinwei and Xu, Sheng and Zhang, Peng and Zhang, Bang}, title = {PortraitDirector: A Hierarchical Disentanglement Framework for Controllable and Real-time Facial Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32377-32388} }
FlashVSR: Towards Real-time Diffusion-Based Streaming Video Super Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Junhao and Guo, Shi and Cai, Xin and Li, Xiaohui and Liu, Yihao and Yuan, Chun and Xue, Tianfan}, title = {FlashVSR: Towards Real-time Diffusion-Based Streaming Video Super Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43482-43493} }
R-C2: Cycle-Consistent Reinforcement Learning Improves Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zirui and Dong, Haoyu and Pei, Kexin and Mao, Chengzhi}, title = {R-C2: Cycle-Consistent Reinforcement Learning Improves Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36893-36903} }
Hear What You See: Video-to-Audio Generation with Diffusion Transformer and Semantic-Temporal Alignment-Ranked Direct Preference Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Kai and Zhou, Tao and Lei, Jiayi and Wang, Jing and Zhao, Jinman and Pian, Weiguo and Cheng, Yuan and Tian, Yapeng and Gao, Peng and Fu, Bin and Liu, Yihao and Hatzinakos, Dimitrios and Cao, Yuewen}, title = {Hear What You See: Video-to-Audio Generation with Diffusion Transformer and Semantic-Temporal Alignment-Ranked Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43396-43406} }
ORD: Object-Relation Decoupling for Generalized 3D Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Ronggang and Meng, Fansen and Zhang, Huaidong and Xu, Xuemiao}, title = {ORD: Object-Relation Decoupling for Generalized 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30964-30973} }
UCMNet: Uncertainty-Aware Context Memory Network for Under-Display Camera Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Daehyun and Kim, Youngmin and Oh, Yoon Ju and Kim, Tae Hyun}, title = {UCMNet: Uncertainty-Aware Context Memory Network for Under-Display Camera Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29939-29948} }
PARSE: Part-Aware Relational Spatial Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Yinuo and Xu, Peijun and Shao, Kuixiang and Jiao, Yuyang and Zhang, Jingxuan and Yao, Kaixin and Gu, Jiayuan and Yu, Jingyi}, title = {PARSE: Part-Aware Relational Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38700-38710} }
SDTrack: A Baseline for Event-based Tracking via Spiking Neural Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Yimeng and Ren, Zhenbang and Wu, Haodi and Wei, Wenjie and Zhu, Rui-Jie and Wang, Shuai and Zhang, Dehao and Xiao, Yichen and Zhang, Jieyuan and Shi, Kexin and Wang, Jingzhinan and Eshraghian, Jason K. and Qu, Haicheng and Zhang, Malu}, title = {SDTrack: A Baseline for Event-based Tracking via Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36245-36254} }
UniPercept: A Unified Diffusion Model for Generalizable Visual Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zuyan and He, Zhenliang and Kan, Meina and Shan, Shiguang and Chen, Xilin}, title = {UniPercept: A Unified Diffusion Model for Generalizable Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43175-43186} }
BrepVGAE: Variational Graph Autoencoder with Unified Latent Representation for B-rep-
[pdf]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hao and Deng, Liyuan and Dai, Yongkang and Wang, Ruohan and Li, Jiahao and Bai, Yunpeng and Shi, Yilei}, title = {BrepVGAE: Variational Graph Autoencoder with Unified Latent Representation for B-rep}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39230-39238} }
DynFusion: Rethinking Condition Fusion for Adaptive Multi-Conditional Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Zheng and Xiang, Lichuan and Cai, Xu and Wang, Bing and Yang, Bo and Wen, Hongkai}, title = {DynFusion: Rethinking Condition Fusion for Adaptive Multi-Conditional Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29103-29113} }
LongVT: Incentivizing "Thinking with Long Videos" via Native Tool Calling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zuhao and Wang, Sudong and Zhang, Kaichen and Wu, Keming and Leng, Sicong and Zhang, Yifan and Li, Bo and Qin, Chengwei and Lu, Shijian and Li, Xingxuan and Bing, Lidong}, title = {LongVT: Incentivizing ''Thinking with Long Videos'' via Native Tool Calling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33816-33826} }
Elucidating the SNR-t Bias of Diffusion Probabilistic Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Meng and Sun, Lei and Zeng, Jianhao and Chu, Xiangxiang and Zhan, Kun}, title = {Elucidating the SNR-t Bias of Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43461-43470} }
ChArtist: Generating Pictorial Charts with Unified Spatial and Subject Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Shishi and Zhou, Tongyu and Laidlaw, David H. and Chan, Gromit Yeuk-Yin}, title = {ChArtist: Generating Pictorial Charts with Unified Spatial and Subject Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29211-29221} }
Adaptive Video Distillation: Mitigating Oversaturation and Temporal Collapse in Few-Step Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Yuyang and Li, Yongzhi and Li, Jiahui and Mu, Yadong and Chen, Quan and Jiang, Peng}, title = {Adaptive Video Distillation: Mitigating Oversaturation and Temporal Collapse in Few-Step Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43429-43439} }
Interpretable Cross-Domain Few-Shot Learning with Rectified Target-Domain Local Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yaze and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Interpretable Cross-Domain Few-Shot Learning with Rectified Target-Domain Local Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41605-41615} }
Structural Action Transformer for 3D Dexterous Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Xiaohan and Wang, Min and Weng, Bohong and Zhou, Wengang and Li, Houqiang}, title = {Structural Action Transformer for 3D Dexterous Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28807-28818} }
SECOS: Semantic Capture for Rigorous Classification in Open-World Semi-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hezhao and Yang, Jiacheng and Gao, Junlong and Li, Mengke and Zhang, Yiqun and Gowda, Shreyank N and Lu, Yang}, title = {SECOS: Semantic Capture for Rigorous Classification in Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39627-39636} }
Mechanisms of Object Localization in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Schaumloffel_2026_CVPR, author = {Schauml\"offel, Timothy and Vilas, Martina G. and Roig, Gemma}, title = {Mechanisms of Object Localization in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31356-31365} }
Bridge: Basis-Driven Causal Inference Marries VFMs for Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Mingbo and Liu, Feng and Gevaert, Caroline and Vosselman, George and Cheng, Hao}, title = {Bridge: Basis-Driven Causal Inference Marries VFMs for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31962-31973} }
Video-as-Answer: Predict and Generate Next Video Event with Joint-GRPO-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Junhao and Hou, Liang and Tao, Xin and Liao, Jing}, title = {Video-as-Answer: Predict and Generate Next Video Event with Joint-GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38915-38925} }
Refer-Agent: A Collaborative Multi-Agent System with Reasoning and Reflection for Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haichao and Liang, Tianming and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Refer-Agent: A Collaborative Multi-Agent System with Reasoning and Reflection for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39508-39517} }
KVSmooth: Mitigating Hallucination in Multi-modal Large Language Models through Key-Value Smoothing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Siyu and Chen, Feiyang and Zhang, Xiaojin and He, Kun}, title = {KVSmooth: Mitigating Hallucination in Multi-modal Large Language Models through Key-Value Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32561-32571} }
PaCo-RL: Advancing Reinforcement Learning for Consistent Image Generation with Pairwise Reward Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ping_2026_CVPR, author = {Ping, Bowen and Jia, Chengyou and Luo, Minnan and Xia, Changliang and Shen, Xin and Dang, Zhuohang and Qian, Hangwei}, title = {PaCo-RL: Advancing Reinforcement Learning for Consistent Image Generation with Pairwise Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34353-34363} }
Mitigating Multimodal Hallucinations via Gradient-based Self-Reflection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shan and Shen, Maying and Chang, Nadine and Nguyen, Chuong and Li, Hongdong and Alvarez, Jose M.}, title = {Mitigating Multimodal Hallucinations via Gradient-based Self-Reflection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32539-32549} }
INSIGHT Bench: Towards Grounded IN-SItu Guidance for Robotic ManipulaTion-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seonho and Hong, Junhyeong and Lee, Kyungjae and Oh, Yoonseon}, title = {INSIGHT Bench: Towards Grounded IN-SItu Guidance for Robotic ManipulaTion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35070-35079} }
FSLoRA: Harmonizing Detection and Re-Identification via Freq-Spatial Low-Rank Adapter for One-Stage Person Search-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Yanling and Zhang, Shanshan and Chen, Di and Yang, Jian}, title = {FSLoRA: Harmonizing Detection and Re-Identification via Freq-Spatial Low-Rank Adapter for One-Stage Person Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40428-40437} }
Concept-Aware LoRA for Domain-Aligned Segmentation Dataset Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Minho and Park, Sunghyun and Lee, Jungsoo and Park, Hyojin and Hwang, Kyuwoong and Porikli, Fatih and Choo, Jaegul and Choi, Sungha}, title = {Concept-Aware LoRA for Domain-Aligned Segmentation Dataset Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39858-39868} }
VISion On Request: Enhanced VLLM efficiency with sparse, dynamically selected, vision-language interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bulat_2026_CVPR, author = {Bulat, Adrian and Baldrati, Alberto and Metaxas, Ioannis Maniadis and Ouali, Yassine and Tzimiropoulos, Georgios}, title = {VISion On Request: Enhanced VLLM efficiency with sparse, dynamically selected, vision-language interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31920-31930} }
HiLoRA: Hierarchical Low-Rank Adaptation for Personalized Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Zihao and Zou, Nan and Zeng, Jiandian and Li, Guo and Chen, Ke and Li, Boyuan and Wang, Tian}, title = {HiLoRA: Hierarchical Low-Rank Adaptation for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31746-31757} }
TrafficAlign: Aligning Large Language Models for Traffic Scenario Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Zhi and Niu, Liangkun and Zhang, Tianyi}, title = {TrafficAlign: Aligning Large Language Models for Traffic Scenario Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39744-39754} }
Reward Sharpness-Aware Fine-Tuning for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Kwanyoung and Sim, Byeongsu}, title = {Reward Sharpness-Aware Fine-Tuning for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36051-36061} }
SGDE: Self-supervised Geometry Degradation Estimation Framework for Coded Aperture Compressive Spectral Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yuqiao and Liu, Xiaoyan and Mao, Jianxu and Wang, Yaonan and Zhang, Hui and Liu, Lizhu and Chen, Yurong and He, Wenbin}, title = {SGDE: Self-supervised Geometry Degradation Estimation Framework for Coded Aperture Compressive Spectral Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34084-34094} }
MindPower: Enabling Theory-of-Mind Reasoning in VLM-based Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruoxuan and Zheng, Qiyun and Zhou, Zhiyu and Liao, Ziqi and Wu, Siyu and Jiang-Lin, Jian-Yu and Wen, Bin and Xie, Hongxia and Fu, Jianlong and Cheng, Wen-Huang}, title = {MindPower: Enabling Theory-of-Mind Reasoning in VLM-based Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29631-29641} }
Online Data Curation for Object Detection via Marginal Contributions to Dataset-level Average Precision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zitang and Yoshimura, Masakazu and Otsuka, Junji and Irie, Atsushi and Ohashi, Takeshi}, title = {Online Data Curation for Object Detection via Marginal Contributions to Dataset-level Average Precision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32787-32797} }
HierAmp: Coarse-to-Fine Autoregressive Amplification for Generative Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lin and Jiang, Xinru and Xiao, Xi and Fan, Qihui and Lu, Lei and Wang, Yanzhi and Lin, Xue and Camps, Octavia and Zhao, Pu and Gu, Jianyang}, title = {HierAmp: Coarse-to-Fine Autoregressive Amplification for Generative Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41688-41698} }
Leveraging Class Distributions in CLIP for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Ziqian and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {Leveraging Class Distributions in CLIP for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34714-34723} }
Accelerating Diffusion Model Training under Minimal Budgets: A Condensation-Based Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Rui and Shao, Shitong and zhou, zikai and Zhao, Pukun and Guo, Hangyu and Ye, Tian and Bai, Lichen and Yang, Shuo and Xie, Zeke}, title = {Accelerating Diffusion Model Training under Minimal Budgets: A Condensation-Based Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43621-43631} }
Defect Cue-Preserved Structural Feature Refinement for Few-Shot Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Le and Huang, Yan and Xu, Zhen and Xu, Yong and Wong, Hau-San and Wu, Si}, title = {Defect Cue-Preserved Structural Feature Refinement for Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35607-35616} }
Neural Mixture Density Processes-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Yi and Tao, Qi and Liang, Xingxing and Zhang, Longfei and Lv, Yiqin and Song, Weitao and Yang, Fangjie and Wang, Cheems and Cheng, Guangquan}, title = {Neural Mixture Density Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39680-39690} }
Distributed Image Compression with Multimodal Side Information at Extremely Low Bitrates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Guojun and Zhang, Mingyang and Xiang, Jianwen and Tan, Cheng and Yang, Yanchao and Zhou, Junwei}, title = {Distributed Image Compression with Multimodal Side Information at Extremely Low Bitrates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33662-33671} }
FreqEdit: Preserving High-Frequency Features for Robust Multi-Turn Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Yucheng and Liang, Jiajun and Cui, Kaiqian and Zhao, Baoquan and Xie, Haoran and Liu, Wei and Li, Qing and Mao, Xudong}, title = {FreqEdit: Preserving High-Frequency Features for Robust Multi-Turn Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43525-43535} }
Smart Replay: Adaptive Scheduling of Memory Rehearsal for Computational Resource-Aware Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jianting and Yu, Dianzhi and King, Irwin}, title = {Smart Replay: Adaptive Scheduling of Memory Rehearsal for Computational Resource-Aware Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39945-39961} }
Layer Consistency Matters: Elegant Latent Transition Discrepancy for Generalizable Synthetic Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yawen and Li, Feng and Kong, Shuqi and Diao, Yunfeng and Gao, Xinjian and Shi, Zenglin and Wang, Meng}, title = {Layer Consistency Matters: Elegant Latent Transition Discrepancy for Generalizable Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38111-38121} }
PointNSP: Autoregressive 3D Point Cloud Generation with Next-Scale Level-of-Detail Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Ziqiao and Wang, Qichao and Dou, Zhiyang and Song, Zixing and Zhou, Zhipeng and King, Irwin and Zhao, Peilin}, title = {PointNSP: Autoregressive 3D Point Cloud Generation with Next-Scale Level-of-Detail Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31450-31461} }
RankOOD - Class Ranking-based Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Denipitiyage_2026_CVPR, author = {Denipitiyage, Dishanika and Karunanayake, Naveen and Seneviratne, Suranga and Chawla, Sanjay}, title = {RankOOD - Class Ranking-based Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42280-42289} }
SASNet: Spatially-Adaptive Sinusoidal Networks for INRs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Haoan and Aldana, Diana and Novello, Tiago and De Floriani, Leila}, title = {SASNet: Spatially-Adaptive Sinusoidal Networks for INRs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41964-41973} }
IncreFA: Breaking the Static Wall of Generative Model Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Haotian and Chang, Dongliang and Gao, Yueying and Tan, Yuexuan and Chen, Lei and Ma, Zhanyu}, title = {IncreFA: Breaking the Static Wall of Generative Model Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35405-35415} }
Rethinking Camera Choice: An Empirical Study on Fisheye Camera Properties in Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Han and Min, Nan and Liu, Xiaotong and Chen, Wendi and Fang, Yuan and Lv, Jun and Lu, Cewu and Wen, Chuan}, title = {Rethinking Camera Choice: An Empirical Study on Fisheye Camera Properties in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35059-35069} }
BAMI: Training-Free Bias Mitigation in GUI Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Borui and Zhang, Bo and Wang, Bo and Zheng, Wenzhao and Cheng, Yuhao and Tang, Liang and Yan, Yiqiang and Zhou, Jie and Lu, Jiwen}, title = {BAMI: Training-Free Bias Mitigation in GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34596-34605} }
Cinematic Audio Source Separation Using Visual Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kang and Lee, Suyeon and Senocak, Arda and Chung, Joon Son}, title = {Cinematic Audio Source Separation Using Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37874-37884} }
GrOCE : Graph-Guided Online Concept Erasure for Text-to-Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Ning and Ge, Zhenyu and Han, Feng and Sun, Yuhua and Li, Chengqing and Chen, Jingjing}, title = {GrOCE : Graph-Guided Online Concept Erasure for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43536-43545} }
NeAR: Coupled Neural Asset-Renderer Stack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hong and Ye, Chongjie and Chen, Houyuan and Xiao, Weiqing and Yan, Ziyang and Xiao, Lixing and Chen, Zhaoxi and Xiang, Jianfeng and Xu, Shaocong and Liu, Xuhui and Wang, Yikai and Zhang, Baochang and Han, Xiaoguang and Yang, Jiaolong and Zhao, Hao}, title = {NeAR: Coupled Neural Asset-Renderer Stack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29834-29844} }
ProgressiveAvatars: Progressive Animatable 3D Gaussian Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Kaiwen and Cui, Jinkai and Zhang, Juyong}, title = {ProgressiveAvatars: Progressive Animatable 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32518-32527} }
Scene Grounding in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Cohen_2026_CVPR, author = {Cohen, Tamir and Segre, Leo and Shomer-Chai, Shay and Avidan, Shai and Averbuch-Elor, Hadar}, title = {Scene Grounding in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33353-33363} }
All Roads Lead to Rome: Incentivizing Divergent Thinking in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and He, Mengqi and Tu, Peter and Zhang, Jing}, title = {All Roads Lead to Rome: Incentivizing Divergent Thinking in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33488-33498} }
MCHDoc: A Comprehensive Benchmark for Reading Multi-Carrier Chinese Historical Documents-
[pdf]
[supp]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Yijun and Zhu, Shipeng and Zuo, Ruijia and Nie, Na and Xue, Hui}, title = {MCHDoc: A Comprehensive Benchmark for Reading Multi-Carrier Chinese Historical Documents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38722-38731} }
GM-R^2: Generative Matching Learning for Unsupervised Geometric Representation and Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haobo and Yu, Liang and Zheng, Jianmin}, title = {GM-R{\textasciicircum}2: Generative Matching Learning for Unsupervised Geometric Representation and Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31430-31439} }
Low-Rank Residual Diffusion Models-
[pdf]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Junfu and Yuan, Jiang}, title = {Low-Rank Residual Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35747-35757} }
Cross-View Splatter: Feed-Forward View Synthesis with Georeferenced Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Turkulainen_2026_CVPR, author = {Turkulainen, Matias and Krishnan, Akshay and Aleotti, Filippo and Sayed, Mohamed and Garcia-Hernando, Guillermo and Kannala, Juho and Solin, Arno and Brostow, Gabriel and Turmukhambetov, Daniyar}, title = {Cross-View Splatter: Feed-Forward View Synthesis with Georeferenced Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40959-40970} }
Chain of Event-Centric Causal Thought for Physically Plausible Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zixuan and Hu, Yixin and Wang, Haolan and Chen, Feng and Liu, Yan and Li, Wen and Lei, Yinjie}, title = {Chain of Event-Centric Causal Thought for Physically Plausible Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38122-38131} }
SunFaded: Illumination-Aware Gaussian Splatting for Dark Scenes with Camera-Mounted Active Lighting-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Wenjie and Ding, Tianle and Yang, Wenfei and Zhang, Tianzhu}, title = {SunFaded: Illumination-Aware Gaussian Splatting for Dark Scenes with Camera-Mounted Active Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40876-40885} }
MM-SeR: Multimodal Self-Refinement for Lightweight Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Junha and Jo, Yongsik and Min, So Yeon and Xie, Quanting and Kim, Taehwan and Bisk, Yonatan and Choo, Jaegul}, title = {MM-SeR: Multimodal Self-Refinement for Lightweight Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30929-30940} }
Thermally Activated Dual-Modal Adversarial Clothing against AI Surveillance Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Jiahuan and Jiang, Tingsong and Liu, Hanqing and Ma, Chao and Zhou, Weien and Yang, Yang and Yao, Wen}, title = {Thermally Activated Dual-Modal Adversarial Clothing against AI Surveillance Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34929-34939} }
Zero-shot Detection of AI-Generated Image via RAW-RGB Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haiwei and Li, Fengpeng and Tu, Zhilin and Li, Yuanman and Li, Xiong and Zhou, Jiantao}, title = {Zero-shot Detection of AI-Generated Image via RAW-RGB Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42997-43007} }
Hunting Normality from Query Sample via Residual Learning for Generalist Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaolei and Wang, Yuexin and Dai, Tianhong and Bai, Huihui and Zhao, Yao and Xiao, Jimin}, title = {Hunting Normality from Query Sample via Residual Learning for Generalist Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43093-43102} }
MedKCO: Medical Vision-Language Pretraining via Knowledge-Driven Cognitive Orchestration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenran and Wu, Ruiqi and Zhou, Tao and Zhou, Yi}, title = {MedKCO: Medical Vision-Language Pretraining via Knowledge-Driven Cognitive Orchestration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35260-35269} }
BIT: Matching-based Bi-directional Interaction Transformation Network for Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Haoxuan and Niu, Guanglin}, title = {BIT: Matching-based Bi-directional Interaction Transformation Network for Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40386-40396} }
TeamHOI: Learning a Unified Policy for Cooperative Human-Object Interactions with Any Team Size-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lionar_2026_CVPR, author = {Lionar, Stefan and Lee, Gim Hee}, title = {TeamHOI: Learning a Unified Policy for Cooperative Human-Object Interactions with Any Team Size}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37121-37132} }
Regulating Rather than Constraining: Adaptive Guidance for Complex Spectral Reconstruction in Pansharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Zhuwei and Xia, Zimin and Chen, He and Yue, Linwei and Zheng, Xianwei}, title = {Regulating Rather than Constraining: Adaptive Guidance for Complex Spectral Reconstruction in Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34832-34842} }
SAQN: Semantic-based Adaptive Query Network for 3D Referring Expression Segmentation-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiale and Wang, Shangfei}, title = {SAQN: Semantic-based Adaptive Query Network for 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38627-38636} }
Making the Classification Explanation Faithful to the Confidence Score-
[pdf]
[supp]
[bibtex]@InProceedings{Mi_2026_CVPR, author = {Mi, Jian-Xun and Pan, Lu and Li, Weisheng}, title = {Making the Classification Explanation Faithful to the Confidence Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38959-38968} }
Selective Amnesia using Contrastive Subnet Erasure for Class Level Unlearning in Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Pramanik_2026_CVPR, author = {Pramanik, Vishal and Maliha, Maisha and Jha, Susmit and Velasquez, Alvaro and Kotevska, Olivera and Jha, Sumit Kumar}, title = {Selective Amnesia using Contrastive Subnet Erasure for Class Level Unlearning in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31662-31671} }
VT-Intrinsic: Physics-Based Decomposition of Reflectance and Shading using a Single Visible-Thermal Image Pair-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Zeqing and Ramanagopal, Mani and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {VT-Intrinsic: Physics-Based Decomposition of Reflectance and Shading using a Single Visible-Thermal Image Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41752-41761} }
Thinking in Dynamics: How Multimodal Large Language Models Perceive, Track, and Reason Dynamics in Physical 4D World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yuzhi and Wen, Kairun and Gao, Rongxin and Liu, Dongxuan and Lou, Yibin and Wu, Jie and Xu, Jing and Zhang, Jian and Yang, Zheng and Lin, Yunlong and Li, Chenxin and Pan, Panwang and Lu, Junbin and Jiang, Jingyan and Ding, Xinghao and Huang, Yue and Wang, Zhi}, title = {Thinking in Dynamics: How Multimodal Large Language Models Perceive, Track, and Reason Dynamics in Physical 4D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33446-33456} }
PHAC: Promptable Human Amodal Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noh_2026_CVPR, author = {Noh, Seung Young and Chang, Ju Yong}, title = {PHAC: Promptable Human Amodal Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30750-30760} }
Outlier-Robust Diffusion Solvers for Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yang and Liu, Jiahua and Pang, Tongyao and Li, Wen and Liu, Zhaoqiang}, title = {Outlier-Robust Diffusion Solvers for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30782-30791} }
C^2FG: Control Classifier-Free Guidance via Score Discrepancy Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jiayang and Zheng, Tianyi and Zou, Jiayang and Yang, Fengxiang and Liu, Shice and Fan, Luyao and Zhang, Zheyu and Zhang, Hao and Chen, Jinwei and Jiang, Peng-Tao and Li, Bo and Wang, Jia}, title = {C{\textasciicircum}2FG: Control Classifier-Free Guidance via Score Discrepancy Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34398-34407} }
Thermal is Always Wild: Characterizing and Addressing Challenges in Thermal-Only Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aydin_2026_CVPR, author = {Aydin, M. Kerem and Saragadam, Vishwanath and Alexander, Emma}, title = {Thermal is Always Wild: Characterizing and Addressing Challenges in Thermal-Only Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29845-29854} }
RHO: Robust Holistic OSM-Based Metric Cross-View Geo-Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Junwei and Dai, Ruize and Liu, Ruiping and Zeng, Zichao and Chen, Yufan and Wang, Fangjinhua and Peng, Kunyu and Yang, Kailun and Zhang, Jiaming and Stiefelhagen, Rainer}, title = {RHO: Robust Holistic OSM-Based Metric Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33727-33737} }
Uncertainty-guided Compositional Alignment with Part-to-Whole Semantic Representativeness in Hyperbolic Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hayeon and Jang, Ji Ha and Kim, Junghun James and Chun, Se Young}, title = {Uncertainty-guided Compositional Alignment with Part-to-Whole Semantic Representativeness in Hyperbolic Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36861-36870} }
Self-Critical Distillation Network for Video-based Commonsense Captioning-
[pdf]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Mengqi and Jia, Gengyun and Bao, Bing-Kun}, title = {Self-Critical Distillation Network for Video-based Commonsense Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40527-40536} }
Inter-Edit: First Benchmark for Interactive Instruction-Based Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Delong and Hou, Haotian and Hou, Zhaohui and Huang, Zhiyuan and Han, Shihao and Zhan, Mingjie and Zhao, Zhicheng and Su, Fei}, title = {Inter-Edit: First Benchmark for Interactive Instruction-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37290-37300} }
Guiding Diffusion Models with Semantically Degraded Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Shilong and Zhang, Yuming and Wang, Hongxia}, title = {Guiding Diffusion Models with Semantically Degraded Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43653-43663} }
Meta-CoT: Enhancing Granularity and Generalization in Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shiyi and Cheng, Yiji and Hang, Tiankai and Yin, Zijin and He, Runze and Xu, Yu and Dai, Wenxun and Lin, Yunlong and Wang, Chunyu and Lu, Qinglin and Tang, Yansong}, title = {Meta-CoT: Enhancing Granularity and Generalization in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38004-38015} }
AutoCut: End-to-end advertisement video editing based on multimodal discretization and controllable generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Milton and Qin, Sizhong and Li, Yongzhi and Chen, Quan and Jiang, Peng}, title = {AutoCut: End-to-end advertisement video editing based on multimodal discretization and controllable generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37777-37787} }
TempoControl: Temporal Attention Guidance for Text-to-Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schiber_2026_CVPR, author = {Schiber, Shira and Lindenbaum, Ofir and Schwartz, Idan}, title = {TempoControl: Temporal Attention Guidance for Text-to-Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36670-36679} }
PanoVGGT: Feed-Forward 3D Reconstruction from Panoramic Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yijing and Chao, Mengjun and Wang, Luo and Zhao, Tianyang and Dai, Haizhao and Zhang, Yingliang and Yu, Jingyi and Shi, Yujiao}, title = {PanoVGGT: Feed-Forward 3D Reconstruction from Panoramic Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36444-36453} }
NavForesee: A Unified Vision-Language World Model for Hierarchical Planning and Dual-Horizon Navigation Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Fei and Xie, Shichao and Luo, Minghua and Chu, Zedong and Hu, Junjun and Wu, Xiaolong and Xu, Mu}, title = {NavForesee: A Unified Vision-Language World Model for Hierarchical Planning and Dual-Horizon Navigation Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32431-32440} }
RealAppiance: Let High-fidelity Appliance Assets Controllable and Workable as Aligned Real Manauls-
[pdf]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuzheng and Long, Yuxing and Kang, Lei and Guo, Yuchong and Yu, Ziyan and Mao, Shangqing and Zhang, Jiyao and Wu, Ruihai and Li, Dongjiang and Shen, Hui and Dong, Hao}, title = {RealAppiance: Let High-fidelity Appliance Assets Controllable and Workable as Aligned Real Manauls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37186-37194} }
Few-Shot Hybrid Incremental Learning:Continually Learning under Data Scarcity and Task Uncertainty-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Shi, Yuzhu and Zhou, Kan and Zhang, Shu and He, Diqi and Zhang, Dingwen and Han, Junwei}, title = {Few-Shot Hybrid Incremental Learning:Continually Learning under Data Scarcity and Task Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32334-32344} }
FINE: Factorizing Knowledge for Initialization of Variable-sized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yucheng and Feng, Fu and Shi, Ruixiao and Shen, Jianlu and Wang, Jing and Rui, Yong and Geng, Xin}, title = {FINE: Factorizing Knowledge for Initialization of Variable-sized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42018-42028} }
OctoNav: Towards Generalist Embodied Navigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Chen and Jin, Liankai and Peng, Xingyu and Zhang, Jiazhao and Deng, Yue and Li, Annan and Wang, He and Liu, Si}, title = {OctoNav: Towards Generalist Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40074-40084} }
Semantic Alignment for Pose-Invariant Identity Preserving Diffusion-
[pdf]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jiwon and Kim, SeonHwa and Park, Soobin and Cha, Eunju and Jin, Kyong Hwan}, title = {Semantic Alignment for Pose-Invariant Identity Preserving Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43363-43372} }
Minerva-Ego: Spatiotemporal Hints for Egocentric Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Nagrani_2026_CVPR, author = {Nagrani, Arsha and Uijlings, Jasper and Buch, Shyamal and Weyand, Tobias and Vijayanarasimhan, Sudheendra and Hu, Bo and Mehran, Ramin and A Ross, David and Schmid, Cordelia}, title = {Minerva-Ego: Spatiotemporal Hints for Egocentric Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38859-38869} }
CaReFlow: Cyclic Adaptive Rectified Flow for Multimodal Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Sijie and Han, Shiqin}, title = {CaReFlow: Cyclic Adaptive Rectified Flow for Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37799-37809} }
RAP: Fast Feedforward Rendering-Free Attribute-Guided Primitive Importance Score Prediction for Efficient 3D Gaussian Splatting Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Kaifa and Yang, Qi and Xu, Yiling and Li, Zhu}, title = {RAP: Fast Feedforward Rendering-Free Attribute-Guided Primitive Importance Score Prediction for Efficient 3D Gaussian Splatting Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33323-33332} }
Sketch2CT: Multimodal Diffusion for Structure-Aware 3D Medical Volume Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Delin and Wang, Chaoli}, title = {Sketch2CT: Multimodal Diffusion for Structure-Aware 3D Medical Volume Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37600-37610} }
Unlocking Pre-trained Weights: Parameter Inheritance for Zero-Shot Initialization-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiaze and Xia, Shiyu and Lv, Jiaqi and Geng, Xin}, title = {Unlocking Pre-trained Weights: Parameter Inheritance for Zero-Shot Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34502-34511} }
UniChange: Unifying Change Detection with Multimodal Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xu and Li, Danyang and Dong, Xiaohang and Wu, Tianhao and Yu, Hualong and Wang, Jianye and Li, Qicheng and Li, Xiang}, title = {UniChange: Unifying Change Detection with Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42169-42179} }
ChartR: Evaluating Reasoning Accuracy and Robustness in Chart Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaojun and Luo, Sixiao and Liu, Ziqi and Yang, Min and Zhang, Qin and Zhang, Liang-Jie}, title = {ChartR: Evaluating Reasoning Accuracy and Robustness in Chart Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41193-41202} }
Elucidating the Design Space of Arbitrary-Noise-Based Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xingyu and Yang, Mengying and Ma, Xinghua and Liang, Dong and Li, Fanding and Luo, Gongning and Wang, Wei and Wang, Kuanquan and Li, Shuo}, title = {Elucidating the Design Space of Arbitrary-Noise-Based Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30833-30842} }
VIVA: VLM-Guided Instruction-Based Video Editing with Reward Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2026_CVPR, author = {Cong, Xiaoyan and Yang, Haotian and Wang, Angtian and Wang, Yizhi and Yang, Yiding and Zhang, Canyu and Ma, Chongyang}, title = {VIVA: VLM-Guided Instruction-Based Video Editing with Reward Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34364-34374} }
Adaptive 3D Perception for Small Aerial Targets Under Sparse Sampling via Reinforcement Learning-
[pdf]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Shenghai and Yihan, Wei and Yee, Jason and Qiao, Zhuoran and lou, boyang and Hu, Enwen}, title = {Adaptive 3D Perception for Small Aerial Targets Under Sparse Sampling via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39064-39074} }
FluxMem: Adaptive Hierarchical Memory for Streaming Video Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yiweng and He, Bo and Wang, Junke and Zheng, Xiangyu and Ye, Ziyi and Wu, Zuxuan}, title = {FluxMem: Adaptive Hierarchical Memory for Streaming Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31272-31282} }
Adapting In-context Generation for Enhanced Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haiwen and Chen, Zining and Liu, Delong and Hou, Zhaohui and Zhao, Zhicheng and Su, Fei}, title = {Adapting In-context Generation for Enhanced Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29167-29177} }
Focus on Background: Exploring SAM's Potential in Few-shot Medical Image Segmentation with Background-centric Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Bo_2026_CVPR, author = {Bo, Yuntian and Zhu, Yazhou and Koniusz, Piotr and Zhang, Haofeng}, title = {Focus on Background: Exploring SAM's Potential in Few-shot Medical Image Segmentation with Background-centric Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30032-30041} }
CoD: A Diffusion Foundation Model for Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Zhaoyang and Zheng, Zihan and Xue, Naifu and Li, Jiahao and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Li, Houqiang and Lu, Yan}, title = {CoD: A Diffusion Foundation Model for Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38420-38429} }
CRFT: Consistent-Recurrent Feature Flow Transformer for Cross-Modal Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xuecong and Ding, Mengzhu and Sun, Zixuan and Li, Zhang and Teng, Xichao}, title = {CRFT: Consistent-Recurrent Feature Flow Transformer for Cross-Modal Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34784-34794} }
Hierarchical Visual Relocalization with Nearest View Synthesis from Feature Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Huaqi and Liu, Bingxi and Chen, Guangcheng and Tang, Fulin and He, Li and Zhang, Hong}, title = {Hierarchical Visual Relocalization with Nearest View Synthesis from Feature Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40981-40991} }
Unleashing Vision-Language Semantics for Deepfake Video Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jiawen and Miao, Yunqi and Zhang, Xueyi and Deng, Jiankang and Pang, Guansong}, title = {Unleashing Vision-Language Semantics for Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42953-42963} }
Uni-Hema: Unified Model for Digital Hematopathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rehman_2026_CVPR, author = {Rehman, Abdul and Rasool, Iqra and Imran, Ayisha and Ali, Mohsen and Sultani, Waqas}, title = {Uni-Hema: Unified Model for Digital Hematopathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37578-37589} }
Breaking the Continuum: Discrete Distribution Learning for Structural MRI Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Tianle and Mo, Mengjingcheng and Wen, Ting and Song, Zhen and Xiong, Zinan and Zhu, Yanjie}, title = {Breaking the Continuum: Discrete Distribution Learning for Structural MRI Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37568-37577} }
SpaceDrive: Infusing Spatial Awareness into VLM-based Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Peizheng and Zhang, Zhenghao and Holtz, David and Yu, Hang and Yang, Yutong and Lai, Yuzhi and Song, Rui and Geiger, Andreas and Zell, Andreas}, title = {SpaceDrive: Infusing Spatial Awareness into VLM-based Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40096-40107} }
CodeMMR: Bridging Natural Language, Code, and Image for Unified Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Jiahui and Li, Qing and Cai, Fengyu and Karray, Fakhri}, title = {CodeMMR: Bridging Natural Language, Code, and Image for Unified Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38742-38752} }
RemedyGS: Defend 3D Gaussian Splatting Against Computation Cost Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yanping and Liu, Zhening and Li, Zijian and Lin, Zehong and Zhang, Jun}, title = {RemedyGS: Defend 3D Gaussian Splatting Against Computation Cost Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33227-33236} }
Beyond Explicit Language: Plug-and-Play Visual-to-Linguistic Modeling Toward General Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Lan_2026_CVPR, author = {Lan, Kaiyang and Cui, Ying and Jing, Chenchen and Zheng, Jianwei and Guo, Dongyan}, title = {Beyond Explicit Language: Plug-and-Play Visual-to-Linguistic Modeling Toward General Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42604-42614} }
Benchmarking Endoscopic Surgical Image Restoration and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2026_CVPR, author = {Pei, Jialun and Guo, Diandian and Yang, Donghui and Li, Zhixi and Feng, Yuxin and Ma, Long and Du, Bo and Heng, Pheng-Ann}, title = {Benchmarking Endoscopic Surgical Image Restoration and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37413-37422} }
EagleVision: A Dual-Stage Framework with BEV-grounding-based Chain-of-Thought for Spatial Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Jiaxu and Wang, Xu and Xie, Mengwei and Zhang, Hang and Xu, Mu and Han, Yang and Yuan, Ding and Zhang, Hong and Yang, Yifan}, title = {EagleVision: A Dual-Stage Framework with BEV-grounding-based Chain-of-Thought for Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38637-38646} }
StableMTL: Repurposing Latent Diffusion Models for Multi-Task Learning from Partially Annotated Synthetic Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Anh-Quan and Lopes, Ivan and de Charette, Raoul}, title = {StableMTL: Repurposing Latent Diffusion Models for Multi-Task Learning from Partially Annotated Synthetic Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37788-37798} }
VideoChat-M1: Collaborative Policy Planning for Video Understanding via Multi-Agent Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Boyu and Wang, Zikang and Yue, Zhengrong and Yan, Kainan and Yu, Chenyun and Huang, Yi and Liu, Zijun and Wen, Yafei and Chen, Xiaoxin and Liu, Yang and Li, Peng and Wang, Yali}, title = {VideoChat-M1: Collaborative Policy Planning for Video Understanding via Multi-Agent Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33772-33783} }
ArchSym: Detecting 3D-Grounded Architectural Symmetries in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hanyu and Cai, Ruojin and Marschner, Steve and Snavely, Noah}, title = {ArchSym: Detecting 3D-Grounded Architectural Symmetries in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36561-36570} }
Teaching DINOv3 About Partial 3D Geometry: A Self-Supervised Geometry-Aware Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Ehm_2026_CVPR, author = {Ehm, Viktoria and Cao, Dongliang and Marin, Riccardo and Scholz, Daniel and Wang, Weikang and Bernard, Florian and Cremers, Daniel}, title = {Teaching DINOv3 About Partial 3D Geometry: A Self-Supervised Geometry-Aware Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42071-42081} }
Multi-modal Frequency Decomposition Network for Semantic Scene Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Die and Wang, Lubo and Liu, Ruonan and Guo, Qing and Wang, Chong and Wu, Dongdong and Feng, Wei and Yang, Kairui and Lin, Di}, title = {Multi-modal Frequency Decomposition Network for Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41531-41540} }
Target-Aware Invertible Encoder with Reconstruction Guidance for Infrared Small Target Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Shule and Zhang, Zetian and Ma, Xiao and Ji, Zexuan}, title = {Target-Aware Invertible Encoder with Reconstruction Guidance for Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32714-32723} }
SD-FSMIS: Adapting Stable Diffusion for Few-Shot Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Meihua and Zhang, Yang and He, Weizhao and Qu, Hu and Li, Yisong}, title = {SD-FSMIS: Adapting Stable Diffusion for Few-Shot Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29979-29989} }
The Invisible Gorilla Effect in Out-of-distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Anthony_2026_CVPR, author = {Anthony, Harry and Liang, Ziyun and Warr, Hermione and Kamnitsas, Konstantinos}, title = {The Invisible Gorilla Effect in Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39314-39325} }
Learning by Analogy: A Causal Framework for Compositional Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Lingjing and Xie, Shaoan and Jiao, Yang and Chen, Yetian and Guo, Yanhui and Shao, Simone and Gao, Yan and Chen, Guangyi and Zhang, Kun}, title = {Learning by Analogy: A Causal Framework for Compositional Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36615-36626} }
UniPart: Part-Level 3D Generation with Unified 3D Geom-Seg Latents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xufan and Wu, Yushuang and Guo, Xiaoyang and Ye, Chongjie and Zhou, Jiaqing and Hu, Tianlei and Han, Xiaoguang and Du, Dong}, title = {UniPart: Part-Level 3D Generation with Unified 3D Geom-Seg Latents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34227-34236} }
Language-Guided One-Step Diffusion Model for Nighttime Flare Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Aoxiang and Yu, Kailong and Xue, Minglong and Pan, Liyuan and He, Jinhong and Yan, Wenchao and Zhou, Mingliang and Wu, Yirui}, title = {Language-Guided One-Step Diffusion Model for Nighttime Flare Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38442-38452} }
Instance-level Visual Active Tracking with Occlusion-Aware Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Haowei and Zhou, Kai and Gao, Hao and Zhang, Shiteng and Hu, Jinwu and Wen, Xutao and Ye, Qixiang and Tan, Mingkui}, title = {Instance-level Visual Active Tracking with Occlusion-Aware Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42494-42504} }
GeoDiff4D: Geometry-Aware Diffusion for 4D Head Avatar Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Chao and Zhao, Xiaochen and Deng, Xiang and Sun, Jingxiang and Di, Donglin and Su, Zhuo and Liu, Yebin}, title = {GeoDiff4D: Geometry-Aware Diffusion for 4D Head Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32485-32495} }
LLaVAShield: Safeguarding Multimodal Multi-Turn Dialogues in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Guolei and Peng, Qinzhi and Xu, Gan and Huang, Yao and Lu, Yuxuan and Shen, Yongjun}, title = {LLaVAShield: Safeguarding Multimodal Multi-Turn Dialogues in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30130-30140} }
Pointing at Parts: Training-Free Few-Shot Grounding in Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Tsai_2026_CVPR, author = {Tsai, Shiang-Feng and Liao, Yuan-Hong and Jhang, Jin-Cheng and Qiao, Nan and Sun, Min}, title = {Pointing at Parts: Training-Free Few-Shot Grounding in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33922-33932} }
Evolving Contextual Safety in Multi-Modal Large Language Models via Inference-Time Self-Reflective Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ce and He, Jinxi and He, Junyi and Sycara, Katia and Xie, Yaqi}, title = {Evolving Contextual Safety in Multi-Modal Large Language Models via Inference-Time Self-Reflective Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41182-41192} }
PETAR: Localized Findings Generation with Mask-Aware Vision-Language Modeling for PET Automated Reporting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maqbool_2026_CVPR, author = {Maqbool, Danyal and Lee, Changhee and Huemann, Zachary and Church, Samuel D. and Larson, Matthew E. and Perlman, Scott B. and Romero, Tomas A. and Warner, Joshua D. and Lubner, Meghan and Tie, Xin and Merkow, Jameson and Hu, Junjie and Cho, Steve Y. and Bradshaw, Tyler J.}, title = {PETAR: Localized Findings Generation with Mask-Aware Vision-Language Modeling for PET Automated Reporting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42637-42648} }
Editprint: General Digital Image Forensics via Editing Fingerprint with Self-Augmentation Training-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haiwei and Li, Kemou and Li, Yuanman and Zhou, Jiantao}, title = {Editprint: General Digital Image Forensics via Editing Fingerprint with Self-Augmentation Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35483-35493} }
When Lines Meet Textures: Spatial-Frequency Aligned Diffusion Features for Cross-Sparsity Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Mingrui and Wang, Fengzhi and Wei, Xin and Wang, Jun and Wang, Nannan and Gao, Xinbo}, title = {When Lines Meet Textures: Spatial-Frequency Aligned Diffusion Features for Cross-Sparsity Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37715-37724} }
Routing on Demand: DSNet for Efficient Progressive Point Cloud Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Xiaoqian and Xiao, Dong and Li, Husen and Liu, Zheng and Chen, Renjie}, title = {Routing on Demand: DSNet for Efficient Progressive Point Cloud Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39111-39120} }
ElasticFormer: Detecting Objects in HRW Shots via Elastic Computing Vision Transformer-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wenxi and Huang, Jingchen and Lyu, Chenyang and Liu, Moran and Lin, Haozhe and Ding, Guiguang and Guo, Yuchen}, title = {ElasticFormer: Detecting Objects in HRW Shots via Elastic Computing Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32735-32744} }
Pushing the Frontier of Audiovisual Perception with Large-Scale Multimodal Correspondence Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vyas_2026_CVPR, author = {Vyas, Apoorv and Chang, Heng-Jui and Yang, Cheng-Fu and Huang, Po-Yao and Gao, Luya and Richter, Julius and Chen, Sanyuan and Le, Matthew and Doll\'ar, Piotr and Feichtenhofer, Christoph and Lee, Ann and Hsu, Wei-Ning}, title = {Pushing the Frontier of Audiovisual Perception with Large-Scale Multimodal Correspondence Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30172-30182} }
NS-Diff: Fluid Navier-Stokes Guided Video Diffusion via Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Zijun and Peng, Yuxin}, title = {NS-Diff: Fluid Navier-Stokes Guided Video Diffusion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43218-43227} }
EpiAgent: An Agent-Centric System for Ancient Inscription Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Shipeng and Chen, Ang and Nie, Na and Fang, Pengfei and Zhang, Min-Ling and Xue, Hui}, title = {EpiAgent: An Agent-Centric System for Ancient Inscription Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39304-39313} }
Diffusion-Based sRGB Real Noise Generation via Prompt-Driven Noise Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2026_CVPR, author = {Ko, Jaekyun and Kim, Dongjin and Lee, Soomin and Wang, Guanghui and Kim, Tae Hyun}, title = {Diffusion-Based sRGB Real Noise Generation via Prompt-Driven Noise Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35956-35966} }
DiGraphHal-Bench: Evaluating Multimodal Large Language Models on Complex Directed Graphs-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yixin and He, Zhao and Hou, Yuxin and Zhou, Changhua and Liu, Zihao and Wang, Peng and Lu, Chenglong and Zhang, Xu and Wang, Wei}, title = {DiGraphHal-Bench: Evaluating Multimodal Large Language Models on Complex Directed Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30885-30894} }
MERG3R: A Divide-and-Conquer Approach to Large-Scale Neural Visual Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Leo Kaixuan and Shaikh, Abdus and Liang, Ruofan and Wu, Zhijie and Guan, Yushi and Vijaykumar, Nandita}, title = {MERG3R: A Divide-and-Conquer Approach to Large-Scale Neural Visual Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28969-28978} }
Alert-CLIP: Abnormality-aware Latent-Enhanced Representation Tuning of CLIP for Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yiyan and Zhang, Menghao and Sun, Haifeng and Ren, Pengfei and Chu, Xianao and Xu, Chenye and Tan, Hong and Wang, Jinghan and Qi, Qi and Wang, Jingyu}, title = {Alert-CLIP: Abnormality-aware Latent-Enhanced Representation Tuning of CLIP for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35545-35554} }
Dropping Anchor and Spherical Harmonics for Sparse-view Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Shuangkang and Shen, I-Chao and Zhang, Xuanyang and Wang, Zesheng and Wang, Yufeng and Ding, Wenrui and YU, Gang and Igarashi, Takeo}, title = {Dropping Anchor and Spherical Harmonics for Sparse-view Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33312-33322} }
MICo-150K: A Comprehensive Dataset Advancing Multi-Image Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Xinyu and Cen, Kangrui and Wei, Hongyang and Guo, Zhen and Li, Bairui and Wang, Zeqing and Zhang, Jinrui and Zhang, Lei}, title = {MICo-150K: A Comprehensive Dataset Advancing Multi-Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29695-29706} }
MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuting and Yuan, Kaishen and Lu, Hao and Yue, Yutao and Chen, Jintai and Wu, Kaishun}, title = {MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35248-35259} }
Good Can Sometimes be Bad: A Unified Attack against 3D Point Cloud Classifier by a Flexible Isotropic Resampling-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Linkun and Zhang, Jiahao and Zhang, Juntao and Zhang, Lei and He, Fazhi and Han, Daojun}, title = {Good Can Sometimes be Bad: A Unified Attack against 3D Point Cloud Classifier by a Flexible Isotropic Resampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42246-42256} }
CryoHype: Reconstructing a thousand cryo-EM structures with transformer-based hypernetworks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Jeffrey and Jeon, Minkyu and Ma, Ambri and Yeung-Levy, Serena and Zhong, Ellen D.}, title = {CryoHype: Reconstructing a thousand cryo-EM structures with transformer-based hypernetworks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35280-35290} }
Measuring the (Un)Faithfulness of Concept-Based Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Shubham and Ahuja, Narendra}, title = {Measuring the (Un)Faithfulness of Concept-Based Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38990-39000} }
Preserving Source Video Realism: High-Fidelity Face Swapping for Cinematic Quality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Zekai and Du, Zongze and Zhu, Zhouhang and Zhong, Hao and Zhu, Muzhi and Wang, Wen and Xi, Yuling and Jing, Chenchen and Chen, Hao and Shen, Chunhua}, title = {Preserving Source Video Realism: High-Fidelity Face Swapping for Cinematic Quality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40653-40663} }
BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Phuong-Anh and Pham, Tien Anh and Le, Duc-Trong and Nguyen, Cam-Van Thi}, title = {BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30246-30256} }
PhysGM: Large Physical Gaussian Model for Feed-Forward 4D Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Chunji and Chen, Zequn and Di, Donglin and Zhang, Weinan and Li, Hao and Wei, Chen and Lei, Yinjie and Li, Changsheng}, title = {PhysGM: Large Physical Gaussian Model for Feed-Forward 4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29855-29865} }
High-Fidelity Virtual Try-On beyond Paired Data Scarcity via Diffusion-based Cycle-Consistent Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jia and Dai, Yijing and Cao, Tingfeng and Wu, Meiling and Luo, Tao and Zhang, Jian Dong and Lu, Guangming and Zeng, Xiaoyi}, title = {High-Fidelity Virtual Try-On beyond Paired Data Scarcity via Diffusion-based Cycle-Consistent Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35821-35830} }
Self-guided Semantic Inspection for Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingjing and Zhang, Lei and Fu, Zheren and Hu, Bo and Mao, Zhendong}, title = {Self-guided Semantic Inspection for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33881-33890} }
Vision-Language Attribute Disentanglement and Reinforcement for Lifelong Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Kunlun and Cheng, Haotong and Li, Jiangmeng and Zou, Xu and Zhou, Jiahuan}, title = {Vision-Language Attribute Disentanglement and Reinforcement for Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40397-40406} }
Dance Across Shifts: Forward-Facilitation Continual Test-Time Adaptation through Dynamic Style Bridging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhilin and Wang, Yabin and Ma, Zhiheng and Song, Yaguang and Wang, Yaowei and Hong, Xiaopeng}, title = {Dance Across Shifts: Forward-Facilitation Continual Test-Time Adaptation through Dynamic Style Bridging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32322-32333} }
PS-SR: Pseudo-Single-Step Video Super-Resolution via Speculative Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Aiqiu and Qiu, Zhaofan and Yao, Ting and Mei, Tao}, title = {PS-SR: Pseudo-Single-Step Video Super-Resolution via Speculative Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38218-38227} }
ARES: Unifying Asymmetric RGB-Event Stereo for Probabilistic Scene Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jie Long and Lee, Gim Hee}, title = {ARES: Unifying Asymmetric RGB-Event Stereo for Probabilistic Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37022-37031} }
SelfHVD: Self-Supervised Handheld Video Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Honglei and Zhang, Zhilu and Fan, Junjie and Wu, Xiaohe and Zuo, Wangmeng}, title = {SelfHVD: Self-Supervised Handheld Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37486-37495} }
RFDM: Residual Flow Diffusion Models for Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Salehi_2026_CVPR, author = {Salehi, Mohammadreza and Noroozi, Mehdi and Morreale, Luca and Chavhan, Ruchika and Chadwick, Malcolm and Gil Couto Pimentel Ramos, Alberto and Mehrotra, Abhinav}, title = {RFDM: Residual Flow Diffusion Models for Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43514-43524} }
The Power of Decaying Steps: Enhancing Attack Stability and Transferability for Sign-based Optimizers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Wei and Dai, Yang and Huang, Jincai and Tao, Qing}, title = {The Power of Decaying Steps: Enhancing Attack Stability and Transferability for Sign-based Optimizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42300-42309} }
Multi-Scale Gradient-Guided Unrolling Architecture with Adaptive Mamba for Compressive Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Le and Gan, Hongping}, title = {Multi-Scale Gradient-Guided Unrolling Architecture with Adaptive Mamba for Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41794-41803} }
OmniVGGT: Omni-Modality Driven Visual Geometry Grounded Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Haosong and Li, Hao and Dai, Yalun and Lan, Yushi and Luo, Yihang and Qi, Tianyu and Zhang, Zhengshen and Zhan, Yufeng and Zhang, Junfei and Xu, Wenchao and Liu, Ziwei}, title = {OmniVGGT: Omni-Modality Driven Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36485-36497} }
Think, Then Verify: A Hypothesis-Verification Multi-Agent Framework for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zheng and Chen, Haoran and Qin, Haoxuan and Wei, Zhipeng and Qian, Tianwen and Bai, Cong}, title = {Think, Then Verify: A Hypothesis-Verification Multi-Agent Framework for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33784-33793} }
Towards Robust Sequential Decomposition for Complex Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Zilai and Cao, Mingdeng and Li, Zijie and Lian, Xiaochen and Shi, Yichun and Zhu, Peihao and Sun, Chen and Wang, Peng}, title = {Towards Robust Sequential Decomposition for Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38101-38110} }
GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiahao and Wang, Zihan and Li, Xiangyang and Zhu, Xing and Shen, Yujun and Xu, Yinghao and Jiang, Shuqiang}, title = {GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40053-40063} }
Learning Explicit Continuous Motion Representation for Dynamic Gaussian Splatting from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuankai and Xiao, Junjin and Huang, Shangwei and Zheng, Wei-shi and Zhang, Qing}, title = {Learning Explicit Continuous Motion Representation for Dynamic Gaussian Splatting from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33291-33300} }
Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Wu, Tiru and Jiang, Yan and Zhou, Wanqi and Hu, Junxing and Han, Ai}, title = {Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42331-42340} }
MotionV2V: Editing Motion in a Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Burgert_2026_CVPR, author = {Burgert, Ryan and Herrmann, Charles and Cole, Forrester and Ryoo, Michael S and Wadhwa, Neal and Voynov, Andrey and Ruiz, Nataniel}, title = {MotionV2V: Editing Motion in a Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35988-35997} }
Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Long and Duan, Huiyu and Zheng, Guoquan and Zhang, Jianbo and Hao, Jie and Yuan, Liang}, title = {Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29866-29876} }
CoT-Edit: Let CoT Guide Instruction Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Sen and Guan, Fengbin and Zhang, Youliang and Li, Xin and Chen, Zhibo}, title = {CoT-Edit: Let CoT Guide Instruction Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37960-37970} }
Sparse Spectral LoRA: Routed Experts for Medical VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Nejatimanzari_2026_CVPR, author = {Nejatimanzari, Omid and Asgariandehkordi, Hojat and Koleilat, Taha and Xiao, Yiming and Rivaz, Hassan}, title = {Sparse Spectral LoRA: Routed Experts for Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35351-35362} }
PointTPA: Dynamic Network Parameter Adaptation for 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Siyuan and Zheng, Chaoqun and Zhou, Xin and Feng, Tianrui and Liang, Dingkang and Bai, Xiang}, title = {PointTPA: Dynamic Network Parameter Adaptation for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36571-36581} }
Can Natural Image Autoencoders Compactly Tokenize fMRI Volumes for Long-Range Dynamics Modeling?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Peter Yongho and Park, Juhyeon and Park, Jungwoo and Choi, Jubin and Seo, Jungwoo and Cha, Jiook and Moon, Taesup}, title = {Can Natural Image Autoencoders Compactly Tokenize fMRI Volumes for Long-Range Dynamics Modeling?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35321-35330} }
Dexterous World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Byungjun and Kim, Taeksoo and Lee, Junyoung and Joo, Hanbyul}, title = {Dexterous World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29663-29673} }
You Only Erase Once: Erasing Anything without Bringing Unexpected Content-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yixing and Zhang, Qing and Xu, Wenju and Zheng, Wei-Shi}, title = {You Only Erase Once: Erasing Anything without Bringing Unexpected Content}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43197-43207} }
EditMGT: Unleashing Potentials of Masked Generative Transformers in Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chow_2026_CVPR, author = {Chow, Wei and Li, Linfeng and Kong, Lingdong and Li, Zefeng and Xu, Qi and Song, Hang and Ye, Tian and Wang, Xian and Bai, Jinbin and Xu, Shilin and Li, Xiangtai and Pan, Junting and Liu, Shaoteng and Zhou, Ran and Yang, Tianshu and Liu, Songhua}, title = {EditMGT: Unleashing Potentials of Masked Generative Transformers in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38038-38048} }
Back

