CVPR 2026 Open Access Repository

Papers

Back
DirectFisheye-GS: Enabling Native Fisheye Input in Gaussian Splatting with Cross-View Joint Optimization: Zhengxian Yang,

Fei Xie,

Xutao Xue,

Rui Zhang,

Taicheng Huang,

Yang Liu,

Mengqi Ji,

Tao Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhengxian and Xie, Fei and Xue, Xutao and Zhang, Rui and Huang, Taicheng and Liu, Yang and Ji, Mengqi and Yu, Tao}, title = {DirectFisheye-GS: Enabling Native Fisheye Input in Gaussian Splatting with Cross-View Joint Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4942-4952} }
CompBench: Benchmarking Complex Instruction-guided Image Editing: Bohan Jia,

Wenxuan Huang,

Yuntian Tang,

Junbo Qiao,

Jincheng Liao,

Shaosheng Cao,

Fei Zhao,

Zhaopeng Feng,

Zhouhong Gu,

Zhenfei Yin,

Lei Bai,

Wanli Ouyang,

Lin Chen,

Fei Zhao,

Zihan Wang,

Yuan Xie,

Shaohui Lin; [pdf] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Bohan and Huang, Wenxuan and Tang, Yuntian and Qiao, Junbo and Liao, Jincheng and Cao, Shaosheng and Zhao, Fei and Feng, Zhaopeng and Gu, Zhouhong and Yin, Zhenfei and Bai, Lei and Ouyang, Wanli and Chen, Lin and Zhao, Fei and Wang, Zihan and Xie, Yuan and Lin, Shaohui}, title = {CompBench: Benchmarking Complex Instruction-guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1112-1122} }
Spk2VidNet: A Hierarchical Recurrent Architecture for High-Fidelity Video Reconstruction from Long Spike-Camera Streams: Yuanlin Wang,

Ruiqin Xiong,

Jiyu Xie,

Zhenkun Zhu,

Zhaofei Yu,

Xiaopeng Fan,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanlin and Xiong, Ruiqin and Xie, Jiyu and Zhu, Zhenkun and Yu, Zhaofei and Fan, Xiaopeng and Huang, Tiejun}, title = {Spk2VidNet: A Hierarchical Recurrent Architecture for High-Fidelity Video Reconstruction from Long Spike-Camera Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12140-12149} }
Continual Distillation of Teachers from Different Domains: Nicolas Michel,

Maorong Wang,

Jiangpeng He,

Toshihiko Yamasaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Michel_2026_CVPR, author = {Michel, Nicolas and Wang, Maorong and He, Jiangpeng and Yamasaki, Toshihiko}, title = {Continual Distillation of Teachers from Different Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10810-10819} }
GT-SVJ: Generative-Transformer-Based Self-Supervised Video Judge For Efficient Video Reward Modeling: Shivanshu Shekhar,

Uttaran Bhattacharya,

Raghavendra Addanki,

Mehrab Tanjim,

Somdeb Sarkhel,

Tong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shekhar_2026_CVPR, author = {Shekhar, Shivanshu and Bhattacharya, Uttaran and Addanki, Raghavendra and Tanjim, Mehrab and Sarkhel, Somdeb and Zhang, Tong}, title = {GT-SVJ: Generative-Transformer-Based Self-Supervised Video Judge For Efficient Video Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9847-9858} }
Beyond Euclidean Gossip: KL-Barycentric Consensus on Heterogeneous and Imbalanced Images: Lu Xu,

Guosheng Yin; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Lu and Yin, Guosheng}, title = {Beyond Euclidean Gossip: KL-Barycentric Consensus on Heterogeneous and Imbalanced Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6167-6175} }
Training-free, Perceptually Consistent Low-Resolution Previews with High-Resolution Image for Efficient Workflows of Diffusion Models: Wongi Jeong,

Hoigi Seo,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wongi and Seo, Hoigi and Chun, Se Young}, title = {Training-free, Perceptually Consistent Low-Resolution Previews with High-Resolution Image for Efficient Workflows of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4547-4557} }
Quantized Residuals to Continuous Prompts for Few-Shot Class Incremental Learning in Vision-Language Models: Abhishek Kumar Sinha,

Nitant Dube,

Soma Biswas; [pdf] [supp]
[bibtex]
@InProceedings{Sinha_2026_CVPR, author = {Sinha, Abhishek Kumar and Dube, Nitant and Biswas, Soma}, title = {Quantized Residuals to Continuous Prompts for Few-Shot Class Incremental Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3856-3865} }
AD-GBC: Anisotropic Granular-Ball Skip-Connection Refiner for UNet-Based Medical Image Segmentation: Xiya Shen,

Qinglin Zhao,

Li Feng; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Xiya and Zhao, Qinglin and Feng, Li}, title = {AD-GBC: Anisotropic Granular-Ball Skip-Connection Refiner for UNet-Based Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1418-1427} }
White-Balance First, Adjust Later: Cross-Camera Color Constancy via Vision-Language Evaluation: Shuwei Li,

Lei Tan,

Robby T. Tan; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shuwei and Tan, Lei and Tan, Robby T.}, title = {White-Balance First, Adjust Later: Cross-Camera Color Constancy via Vision-Language Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1331-1341} }
Reallocating Attention Across Layers to Reduce Multimodal Hallucination: Haolang Lu,

Bolun Chu,

WeiYe Fu,

Guoshun Nan,

Junning Liu,

Minghui Pan,

Qiankun Li,

Yi Yu,

Hua Wang,

Kun Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Haolang and Chu, Bolun and Fu, WeiYe and Nan, Guoshun and Liu, Junning and Pan, Minghui and Li, Qiankun and Yu, Yi and Wang, Hua and Wang, Kun}, title = {Reallocating Attention Across Layers to Reduce Multimodal Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4157-4167} }
PanoEnv: Exploring 3D Spatial Intelligence in Panoramic Environments with Reinforcement Learning: Zekai Lin,

Xu Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zekai and Zheng, Xu}, title = {PanoEnv: Exploring 3D Spatial Intelligence in Panoramic Environments with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9647-9657} }
Erasing Thousands of Concepts: Towards Scalable and Practical Concept Erasure for Text-to-Image Diffusion Models: Hoigi Seo,

Byung Hyun Lee,

Jaehyun Cho,

Sungjin Lim,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Hoigi and Lee, Byung Hyun and Cho, Jaehyun and Lim, Sungjin and Chun, Se Young}, title = {Erasing Thousands of Concepts: Towards Scalable and Practical Concept Erasure for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10262-10272} }
REArtGS++: Generalizable Articulation Reconstruction with Temporal Geometry Constraint via Planar Gaussian Splatting: Di Wu,

Liu Liu,

Anran Huang,

Yuyan Liu,

Qiaojun Yu,

Shaofan Liu,

Liangtu Song,

Cewu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Di and Liu, Liu and Huang, Anran and Liu, Yuyan and Yu, Qiaojun and Liu, Shaofan and Song, Liangtu and Lu, Cewu}, title = {REArtGS++: Generalizable Articulation Reconstruction with Temporal Geometry Constraint via Planar Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1177-1186} }
Efficient and High-Fidelity Omni Modality Retrieval: Chuong Huynh,

Manh Luong,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huynh_2026_CVPR, author = {Huynh, Chuong and Luong, Manh and Shrivastava, Abhinav}, title = {Efficient and High-Fidelity Omni Modality Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8770-8780} }
What Is the Optimal Ranking Score Between Precision and Recall? We Can Always Find It and It Is Rarely F1: Sébastien Piérard,

Adrien Deliège,

Marc Van Droogenbroeck; [pdf] [supp]
[bibtex]
@InProceedings{Pierard_2026_CVPR, author = {Pi\'erard, S\'ebastien and Deli\`ege, Adrien and Van Droogenbroeck, Marc}, title = {What Is the Optimal Ranking Score Between Precision and Recall? We Can Always Find It and It Is Rarely F1}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9722-9731} }
RetouchIQ: MLLM Agents for Instruction-Based Image Retouching with Generalist Reward: Qiucheng Wu,

Jing Shi,

Simon Jenni,

Kushal Kafle,

Tianyu Wang,

Shiyu Chang,

Handong Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Qiucheng and Shi, Jing and Jenni, Simon and Kafle, Kushal and Wang, Tianyu and Chang, Shiyu and Zhao, Handong}, title = {RetouchIQ: MLLM Agents for Instruction-Based Image Retouching with Generalist Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12279-12288} }
LongStream: Long-Sequence Streaming Autoregressive Visual Geometry: Chong Cheng,

Xianda Chen,

Tao Xie,

Wei Yin,

Weiqiang Ren,

Qian Zhang,

Xiaoyang Guo,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Chong and Chen, Xianda and Xie, Tao and Yin, Wei and Ren, Weiqiang and Zhang, Qian and Guo, Xiaoyang and Wang, Hao}, title = {LongStream: Long-Sequence Streaming Autoregressive Visual Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {272-283} }
Ultra Diffusion Poser: Diffusion-Based Human Motion Tracking from Sparse Inertial Sensors and Ranging-based Between-sensor Distances: Dominik Hollidt,

Tommaso Bendinelli,

Christian Holz; [pdf] [supp]
[bibtex]
@InProceedings{Hollidt_2026_CVPR, author = {Hollidt, Dominik and Bendinelli, Tommaso and Holz, Christian}, title = {Ultra Diffusion Poser: Diffusion-Based Human Motion Tracking from Sparse Inertial Sensors and Ranging-based Between-sensor Distances}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7036-7046} }
APPO: Attention-guided Perception Policy Optimization for Video Reasoning: Henghui Du,

Chang Zhou,

Xi Chen,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Henghui and Zhou, Chang and Chen, Xi and Hu, Di}, title = {APPO: Attention-guided Perception Policy Optimization for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12269-12278} }
An Efficient Token Compression Framework for Visual Object Tracking: Weijing Wu,

Qihua Liang,

Bineng Zhong,

Haiying Xia,

Zhiyi Mo,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Weijing and Liang, Qihua and Zhong, Bineng and Xia, Haiying and Mo, Zhiyi and Song, Shuxiang}, title = {An Efficient Token Compression Framework for Visual Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6857-6867} }
GraphVLM: Benchmarking Vision Language Models for Multimodal Graph Learning: Jiajin Liu,

Dongzhe Fan,

Chuanhao Ji,

Daochen Zha,

Qiaoyu Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiajin and Fan, Dongzhe and Ji, Chuanhao and Zha, Daochen and Tan, Qiaoyu}, title = {GraphVLM: Benchmarking Vision Language Models for Multimodal Graph Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9491-9500} }
MovieRecapsQA: A Multimodal Open-Ended Video Question-Answering Benchmark: Shaden Shaar,

Bradon Thymes,

Sirawut Chaixanien,

Claire Cardie,

Bharath Hariharan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shaar_2026_CVPR, author = {Shaar, Shaden and Thymes, Bradon and Chaixanien, Sirawut and Cardie, Claire and Hariharan, Bharath}, title = {MovieRecapsQA: A Multimodal Open-Ended Video Question-Answering Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4537-4546} }
PhysHead: Simulation-Ready Gaussian Head Avatars: Berna Kabadayi,

Vanessa Sklyarova,

Wojciech Zielonka,

Justus Thies,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kabadayi_2026_CVPR, author = {Kabadayi, Berna and Sklyarova, Vanessa and Zielonka, Wojciech and Thies, Justus and Pons-Moll, Gerard}, title = {PhysHead: Simulation-Ready Gaussian Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4109-4121} }
Physical Simulator In-the-Loop Video Generation: Lin Geng Foo,

Mark He Huang,

Alexandros Lattas,

Stylianos Moschoglou,

Thabo Beeler,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Foo_2026_CVPR, author = {Foo, Lin Geng and Huang, Mark He and Lattas, Alexandros and Moschoglou, Stylianos and Beeler, Thabo and Theobalt, Christian}, title = {Physical Simulator In-the-Loop Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4301-4311} }
JoPPO: Hierarchical Photography Assessment via Contrastive Joint Conditional Probabilistic Reinforcement Learning: Yifan Yang,

Juntuo Wang,

Yuming Qiao,

Xudong Zhang,

Chunyang Yu,

Yan Li,

Xiao Lin,

Liang Luo,

Dan Meng; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yifan and Wang, Juntuo and Qiao, Yuming and Zhang, Xudong and Yu, Chunyang and Li, Yan and Lin, Xiao and Luo, Liang and Meng, Dan}, title = {JoPPO: Hierarchical Photography Assessment via Contrastive Joint Conditional Probabilistic Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11684-11693} }
Functional Mean Flow in Hilbert Space: Zhiqi Li,

Yuchen Sun,

Greg Turk,

Bo Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiqi and Sun, Yuchen and Turk, Greg and Zhu, Bo}, title = {Functional Mean Flow in Hilbert Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1928-1938} }
One Patch to Caption Them All: A Unified Zero-Shot Captioning Framework: Lorenzo Bianchi,

Giacomo Pacini,

Fabio Carrara,

Nicola Messina,

Giuseppe Amato,

Fabrizio Falchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bianchi_2026_CVPR, author = {Bianchi, Lorenzo and Pacini, Giacomo and Carrara, Fabio and Messina, Nicola and Amato, Giuseppe and Falchi, Fabrizio}, title = {One Patch to Caption Them All: A Unified Zero-Shot Captioning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5532-5542} }
Fine-Grained Post-Training Quantization for Large Vision Language Models with Quantization-Aware Integrated Gradients: Ziwei Xiang,

Fanhu Zeng,

Hongjian Fang,

Rui-Qi Wang,

Renxing Chen,

Yanan Zhu,

Yi Chen,

Peipei Yang,

Xu-Yao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Ziwei and Zeng, Fanhu and Fang, Hongjian and Wang, Rui-Qi and Chen, Renxing and Zhu, Yanan and Chen, Yi and Yang, Peipei and Zhang, Xu-Yao}, title = {Fine-Grained Post-Training Quantization for Large Vision Language Models with Quantization-Aware Integrated Gradients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3500-3510} }
GR-Gauge: Cost-efficient Training Configuration By Gauging the Gradient Redundancy: Guanjie Wang,

Chen Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Guanjie and Chen, Chen}, title = {GR-Gauge: Cost-efficient Training Configuration By Gauging the Gradient Redundancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12934-12943} }
SPEGC: Continual Test-Time Adaptation via Semantic-Prompt-Enhanced Graph Clustering for Medical Image Segmentation: Xiaogang Du,

Jiawei Zhang,

Tongfei Liu,

Tao Lei,

Yingbo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Xiaogang and Zhang, Jiawei and Liu, Tongfei and Lei, Tao and Wang, Yingbo}, title = {SPEGC: Continual Test-Time Adaptation via Semantic-Prompt-Enhanced Graph Clustering for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8481-8491} }
Vocabulary Scaling Law: Tuning Open-vocabulary Predictors for Their Openness: Ziliang Chen,

Yulu Li,

Liangda Fang,

Jusheng Zhang,

Yongsen Zheng,

Quanlong Guan,

Xipeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ziliang and Li, Yulu and Fang, Liangda and Zhang, Jusheng and Zheng, Yongsen and Guan, Quanlong and Chen, Xipeng}, title = {Vocabulary Scaling Law: Tuning Open-vocabulary Predictors for Their Openness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3091-3100} }
The Consistency Critic: Correcting Inconsistencies in Generated Images via Reference-Guided Attentive Alignment: Ziheng Ouyang,

Yiren Song,

Yaoli Liu,

Shihao Zhu,

Qibin Hou,

Ming-Ming Cheng,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Ziheng and Song, Yiren and Liu, Yaoli and Zhu, Shihao and Hou, Qibin and Cheng, Ming-Ming and Shou, Mike Zheng}, title = {The Consistency Critic: Correcting Inconsistencies in Generated Images via Reference-Guided Attentive Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2035-2046} }
Where Does Vision Meet Language? Understanding and Refining Visual Fusion in MLLMs via Contrastive Attention: Shezheng Song,

Shasha Li,

Shan Zhao,

Xiaopeng Li,

Qian Wan,

Chengyu Wang,

Tianwei Yan,

Ma Jun,

Jie Yu; [pdf]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Shezheng and Li, Shasha and Zhao, Shan and Li, Xiaopeng and Wan, Qian and Wang, Chengyu and Yan, Tianwei and Jun, Ma and Yu, Jie}, title = {Where Does Vision Meet Language? Understanding and Refining Visual Fusion in MLLMs via Contrastive Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10051-10060} }
Taming Preference Mode Collapse via Directional Decoupling Alignment in Diffusion Reinforcement Learning: Chubin Chen,

Sujie Hu,

Jiashu Zhu,

Meiqi Wu,

Jintao Chen,

Yanxun Li,

Nisha Huang,

Chengyu Fang,

Jiahong Wu,

Xiangxiang Chu,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Chubin and Hu, Sujie and Zhu, Jiashu and Wu, Meiqi and Chen, Jintao and Li, Yanxun and Huang, Nisha and Fang, Chengyu and Wu, Jiahong and Chu, Xiangxiang and Li, Xiu}, title = {Taming Preference Mode Collapse via Directional Decoupling Alignment in Diffusion Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12775-12786} }
DocPrune: Efficient Document Question Answering via Background, Question, and Comprehension-aware Token Pruning: Joonmyung Choi,

Sanghyeok Lee,

Jongha Kim,

Sehyung Kim,

Dohwan Ko,

Jihyung Kil,

Hyunwoo J. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Joonmyung and Lee, Sanghyeok and Kim, Jongha and Kim, Sehyung and Ko, Dohwan and Kil, Jihyung and Kim, Hyunwoo J.}, title = {DocPrune: Efficient Document Question Answering via Background, Question, and Comprehension-aware Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3543-3552} }
POUR: A Provably Optimal Method for Unlearning Representation via Neural Collapse: Anjie Le,

Can Peng,

Yuyuan Liu,

J. Alison Noble; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Anjie and Peng, Can and Liu, Yuyuan and Noble, J. Alison}, title = {POUR: A Provably Optimal Method for Unlearning Representation via Neural Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10273-10282} }
Skyra: AI-Generated Video Detection via Grounded Artifact Reasoning: Yifei Li,

Wenzhao Zheng,

Yanran Zhang,

Runze Sun,

Yu Zheng,

Lei Chen,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yifei and Zheng, Wenzhao and Zhang, Yanran and Sun, Runze and Zheng, Yu and Chen, Lei and Zhou, Jie and Lu, Jiwen}, title = {Skyra: AI-Generated Video Detection via Grounded Artifact Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4482-4493} }
MAMMA: Markerless Accurate Multi-person Motion Acquisition: Hanz Cuevas Velasquez,

Anastasios Yiannakidis,

Soyong Shin,

Giorgio Becherini,

Markus Höschle,

Joachim Tesch,

Taylor Obersat,

Tsvetelina Alexiadis,

Eni Halilaj,

Michael J. Black; [pdf] [supp]
[bibtex]
@InProceedings{Velasquez_2026_CVPR, author = {Velasquez, Hanz Cuevas and Yiannakidis, Anastasios and Shin, Soyong and Becherini, Giorgio and H\"oschle, Markus and Tesch, Joachim and Obersat, Taylor and Alexiadis, Tsvetelina and Halilaj, Eni and Black, Michael J.}, title = {MAMMA: Markerless Accurate Multi-person Motion Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7175-7186} }
First Frame Is the Place to Go for Video Content Customization: Jingxi Chen,

Zongxia Li,

Zhichao Liu,

Guangyao Shi,

Xiyang Wu,

Fuxiao Liu,

Cornelia Fermüller,

Brandon Y. Feng,

Yiannis Aloimonos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jingxi and Li, Zongxia and Liu, Zhichao and Shi, Guangyao and Wu, Xiyang and Liu, Fuxiao and Ferm\"uller, Cornelia and Feng, Brandon Y. and Aloimonos, Yiannis}, title = {First Frame Is the Place to Go for Video Content Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9243-9252} }
HeroGS: Hierarchical Guidance for Robust 3D Gaussian Splatting under Sparse Views: Jiashu Li,

Xumeng Han,

Zhaoyang Wei,

Zipeng Wang,

Kuiran Wang,

Guorong Li,

Zhenjun Han,

Jianbin Jiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiashu and Han, Xumeng and Wei, Zhaoyang and Wang, Zipeng and Wang, Kuiran and Li, Guorong and Han, Zhenjun and Jiao, Jianbin}, title = {HeroGS: Hierarchical Guidance for Robust 3D Gaussian Splatting under Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11788-11797} }
Bidirectional Query-Driven Generation of Parametric CAD Sketch: Yang Liu,

Daxuan Ren,

Yijie Ding,

Jianmin Zheng,

Fang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Ren, Daxuan and Ding, Yijie and Zheng, Jianmin and Deng, Fang}, title = {Bidirectional Query-Driven Generation of Parametric CAD Sketch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3176-3185} }
SRGCD: Stability-Driven Region Growth Framework for 3D Change Detection: Yue Wu,

Tao Peng,

Yongzhe Yuan,

Kaiyuan Feng,

Hao Li,

Maoguo Gong,

Qiguang Miao,

Wenping Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yue and Peng, Tao and Yuan, Yongzhe and Feng, Kaiyuan and Li, Hao and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {SRGCD: Stability-Driven Region Growth Framework for 3D Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7546-7555} }
Dual-Estimator: Decoupling Global and Local Semantic Shift for Drift Compensation in Class-Incremental Learning: Fankang Xu,

Lu Jin,

Yanpeng Sun,

Shiyu Xuan,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Dual-Estimator: Decoupling Global and Local Semantic Shift for Drift Compensation in Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10799-10809} }
SAM 3D Body: Robust Full-Body Human Mesh Recovery: Xitong Yang,

Devansh Kukreja,

Don Pinkus,

Taosha Fan,

Jinhyung Park,

Soyong Shin,

Jinkun Cao,

Jia-Wei Liu,

Nicolás Ugrinovic,

Anushka Sagar,

Jitendra Malik,

Matt Feiszli,

Piotr Dollár,

Kris Kitani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xitong and Kukreja, Devansh and Pinkus, Don and Fan, Taosha and Park, Jinhyung and Shin, Soyong and Cao, Jinkun and Liu, Jia-Wei and Ugrinovic, Nicol\'as and Sagar, Anushka and Malik, Jitendra and Feiszli, Matt and Doll\'ar, Piotr and Kitani, Kris}, title = {SAM 3D Body: Robust Full-Body Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7209-7219} }
GuardTrace-VL: Detecting Unsafe Multimodel Reasoning via Iterative Safety Supervision: Yuxiao Xiang,

Junchi Chen,

Zhenchao Jin,

Changtao Miao,

Haojie Yuan,

Qi Chu,

Tao Gong,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Yuxiao and Chen, Junchi and Jin, Zhenchao and Miao, Changtao and Yuan, Haojie and Chu, Qi and Gong, Tao and Yu, Nenghai}, title = {GuardTrace-VL: Detecting Unsafe Multimodel Reasoning via Iterative Safety Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11912-11922} }
Tackling Alignment Ambiguity in Person Retrieval through Conversational Attribute Mining: Hao Zou,

Runqing Zhang,

Jin Ding,

Xue Zhou,

Jianxiao Zou,

Mingzhu Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Hao and Zhang, Runqing and Ding, Jin and Zhou, Xue and Zou, Jianxiao and Cai, Mingzhu}, title = {Tackling Alignment Ambiguity in Person Retrieval through Conversational Attribute Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9689-9698} }
Gastric-X: A Multimodal Multi-Phase Benchmark Dataset for Advancing Vision-Language Models in Gastric Cancer Analysis: Yuanzhe Li,

Hao Chen,

Rui Yin,

Juyan Ba,

Yu Zhang,

Sheng Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuanzhe and Chen, Hao and Yin, Rui and Ba, Juyan and Zhang, Yu and Lu, Sheng}, title = {Gastric-X: A Multimodal Multi-Phase Benchmark Dataset for Advancing Vision-Language Models in Gastric Cancer Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2490-2501} }
AVION: Aerial Vision-Language Instruction from Offline Teacher to Prompt-Tuned Network: Yu Hu,

Jianyang Gu,

Hao Liu,

Yue Cao,

Jozsef Hamari,

Zheng Liu,

Mohsen Zardadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Gu, Jianyang and Liu, Hao and Cao, Yue and Hamari, Jozsef and Liu, Zheng and Zardadi, Mohsen}, title = {AVION: Aerial Vision-Language Instruction from Offline Teacher to Prompt-Tuned Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10106-10115} }
OSA: Echocardiography Video Segmentation via Orthogonalized State Update and Anatomical Prior-aware Feature Enhancement: Rui Wang,

Huisi Wu,

Jing Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Rui and Wu, Huisi and Qin, Jing}, title = {OSA: Echocardiography Video Segmentation via Orthogonalized State Update and Anatomical Prior-aware Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1428-1438} }
VLM-Guided Group Preference Alignment for Diffusion-based Human Mesh Recovery: Wenhao Shen,

Hao Wang,

Wanqi Yin,

Fayao Liu,

Xulei Yang,

Chao Liang,

Zhongang Cai,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Wenhao and Wang, Hao and Yin, Wanqi and Liu, Fayao and Yang, Xulei and Liang, Chao and Cai, Zhongang and Lin, Guosheng}, title = {VLM-Guided Group Preference Alignment for Diffusion-based Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13918-13929} }
PhysIR-Splat: Physically Consistent Thermal Infrared Radiative Transfer in 3D Gaussian Splatting: Jingyuan Gao,

Yumeng Hu,

Fei Gao,

Mingjin Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jingyuan and Hu, Yumeng and Gao, Fei and Zhang, Mingjin}, title = {PhysIR-Splat: Physically Consistent Thermal Infrared Radiative Transfer in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11818-11828} }
Generalized-CVO: Fast and Correspondence-Free Local Point Cloud Registration with Second Order Riemannian Optimization: Ray Zhang,

Marcus Greiff,

Thomas Lew,

John Subosits; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ray and Greiff, Marcus and Lew, Thomas and Subosits, John}, title = {Generalized-CVO: Fast and Correspondence-Free Local Point Cloud Registration with Second Order Riemannian Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2948-2958} }
MultiAnimate: Pose-Guided Image Animation Made Extensible: Yingcheng Hu,

Haowen Gong,

Chuanguang Yang,

Zhulin An,

Yongjun Xu,

Songhua Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yingcheng and Gong, Haowen and Yang, Chuanguang and An, Zhulin and Xu, Yongjun and Liu, Songhua}, title = {MultiAnimate: Pose-Guided Image Animation Made Extensible}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9306-9316} }
AnyLift: Scaling Motion Reconstruction from Internet Videos via 2D Diffusion: Hongjie Li,

Heng Yu,

Jiaman Li,

Hong-Xing Yu,

Ehsan Adeli,

C. Karen Liu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hongjie and Yu, Heng and Li, Jiaman and Yu, Hong-Xing and Adeli, Ehsan and Liu, C. Karen and Wu, Jiajun}, title = {AnyLift: Scaling Motion Reconstruction from Internet Videos via 2D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13876-13886} }
Accelerating Diffusion via Hybrid Data-Pipeline Parallelism Based on Conditional Guidance Scheduling: Euisoo Jung,

Byunghyun Kim,

Hyunjin Kim,

Seonghye Cho,

Jae-Gil Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Euisoo and Kim, Byunghyun and Kim, Hyunjin and Cho, Seonghye and Lee, Jae-Gil}, title = {Accelerating Diffusion via Hybrid Data-Pipeline Parallelism Based on Conditional Guidance Scheduling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9374-9383} }
The Missing Point in Vision Transformers for Universal Image Segmentation: Sajjad Shahabodini,

Mobina Mansoori,

Farnoush Bayatmakou,

Jamshid Abouei,

Konstantinos Plataniotis,

Arash Mohammadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shahabodini_2026_CVPR, author = {Shahabodini, Sajjad and Mansoori, Mobina and Bayatmakou, Farnoush and Abouei, Jamshid and Plataniotis, Konstantinos and Mohammadi, Arash}, title = {The Missing Point in Vision Transformers for Universal Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6314-6324} }
AeroAgent: A Vision-Physics-Decision Framework for Aerodynamic Vehicle Design: Ye Liu,

Shouyi Liu,

Huiyu Yang,

Jianghang Gu,

Wenhao Fan,

Zhongxin Yang,

Ding Wang,

Simeng Chen,

Zirun Jiang,

Yuanwei Bin,

Shiyi Chen,

Yuntian Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ye and Liu, Shouyi and Yang, Huiyu and Gu, Jianghang and Fan, Wenhao and Yang, Zhongxin and Wang, Ding and Chen, Simeng and Jiang, Zirun and Bin, Yuanwei and Chen, Shiyi and Chen, Yuntian}, title = {AeroAgent: A Vision-Physics-Decision Framework for Aerodynamic Vehicle Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11694-11703} }
StreamAvatar: Streaming Diffusion Models for Real-Time Interactive Human Avatars: Zhiyao Sun,

Ziqiao Peng,

Yifeng Ma,

Yi Chen,

Zhengguang Zhou,

Zixiang Zhou,

Guozhen Zhang,

Youliang Zhang,

Yuan Zhou,

Qinglin Lu,

Yong-Jin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhiyao and Peng, Ziqiao and Ma, Yifeng and Chen, Yi and Zhou, Zhengguang and Zhou, Zixiang and Zhang, Guozhen and Zhang, Youliang and Zhou, Yuan and Lu, Qinglin and Liu, Yong-Jin}, title = {StreamAvatar: Streaming Diffusion Models for Real-Time Interactive Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10887-10897} }
Dynamics-Aware Preference Optimization for Vision-Language Models: Jusheng Zhang,

Kaitong Cai,

Jing Yang,

Jian Wang,

Keze Wang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Cai, Kaitong and Yang, Jing and Wang, Jian and Wang, Keze}, title = {Dynamics-Aware Preference Optimization for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11610-11620} }
CLAY: Conditional Visual Similarity Modulation in Vision-Language Embedding Space: Sohwi Lim,

Lee Hyoseok,

Jungjoon Park,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Sohwi and Hyoseok, Lee and Park, Jungjoon and Oh, Tae-Hyun}, title = {CLAY: Conditional Visual Similarity Modulation in Vision-Language Embedding Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9679-9688} }
The Surprising Effectiveness of Noise Pretraining for Implicit Neural Representations: Kushal Vyas,

Alper Kayabasi,

Daniel Kim,

Vishwanath Saragadam,

Ashok Veeraraghavan,

Guha Balakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vyas_2026_CVPR, author = {Vyas, Kushal and Kayabasi, Alper and Kim, Daniel and Saragadam, Vishwanath and Veeraraghavan, Ashok and Balakrishnan, Guha}, title = {The Surprising Effectiveness of Noise Pretraining for Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6116-6125} }
SGDrive: Scene-to-Goal Hierarchical World Cognition for Autonomous Driving: Jingyu Li,

Junjie Wu,

Dongnan Hu,

Xiangkai Huang,

Bin Sun,

Zhihui Hao,

Xianpeng Lang,

Xiatian Zhu,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jingyu and Wu, Junjie and Hu, Dongnan and Huang, Xiangkai and Sun, Bin and Hao, Zhihui and Lang, Xianpeng and Zhu, Xiatian and Zhang, Li}, title = {SGDrive: Scene-to-Goal Hierarchical World Cognition for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4032-4042} }
Are Image-to-Video Models Good Zero-Shot Image Editors?: Zechuan Zhang,

Zhenyuan Chen,

Zongxin Yang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zechuan and Chen, Zhenyuan and Yang, Zongxin and Yang, Yi}, title = {Are Image-to-Video Models Good Zero-Shot Image Editors?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2090-2103} }
RT-Splatting: Joint Reflection-Transmission Modeling with Gaussian Splatting: Ji Shi,

Xianghua Ying,

Bowei Xing,

Ruohao Guo,

Wenzhen Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Ji and Ying, Xianghua and Xing, Bowei and Guo, Ruohao and Yue, Wenzhen}, title = {RT-Splatting: Joint Reflection-Transmission Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4870-4880} }
From Observation to Action: Latent Action-based Primitive Segmentation for VLA Pre-training in Industrial Settings: Jiajie Zhang,

Sören Schwertfeger,

Alexander Kleiner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiajie and Schwertfeger, S\"oren and Kleiner, Alexander}, title = {From Observation to Action: Latent Action-based Primitive Segmentation for VLA Pre-training in Industrial Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6750-6759} }
Shoe Style-Invariant and Ground-Aware Learning for Dense Foot Contact Estimation: Daniel Sungho Jung,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Daniel Sungho and Lee, Kyoung Mu}, title = {Shoe Style-Invariant and Ground-Aware Learning for Dense Foot Contact Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7058-7067} }
One-Shot Flow, Any-Time Frame: A Bidirectional Warping Framework for Event-Based Video Frame Interpolation: Linghui Fu,

Yuhan Liu,

Hao Chen,

Zhen Yang,

Yongjian Deng; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Linghui and Liu, Yuhan and Chen, Hao and Yang, Zhen and Deng, Yongjian}, title = {One-Shot Flow, Any-Time Frame: A Bidirectional Warping Framework for Event-Based Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2832-2842} }
SEASON: Mitigating Temporal Hallucination in Video Large Language Models via Self-Diagnostic Contrastive Decoding: Chang-Hsun Wu,

Kai-Po Chang,

Yu-Yang Sheng,

Hung-Kai Chung,

Kuei-Chun Wang,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chang-Hsun and Chang, Kai-Po and Sheng, Yu-Yang and Chung, Hung-Kai and Wang, Kuei-Chun and Wang, Yu-Chiang Frank}, title = {SEASON: Mitigating Temporal Hallucination in Video Large Language Models via Self-Diagnostic Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11096-11105} }
A Training-Free Style-Personalization via SVD-Based Feature Decomposition: Kyoungmin Lee,

Jihun Park,

Jongmin Gim,

Wonhyeok Choi,

Kyumin Hwang,

Jaeyeul Kim,

Sunghoon Im; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Kyoungmin and Park, Jihun and Gim, Jongmin and Choi, Wonhyeok and Hwang, Kyumin and Kim, Jaeyeul and Im, Sunghoon}, title = {A Training-Free Style-Personalization via SVD-Based Feature Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {506-516} }
Unlocking Token Rewards via Training-Free Reward Attribution: Sitong Wu,

Haoru Tan,

Bin Xia,

Xichen Zhang,

Jingyao Li,

Shaofeng Zhang,

Xiaojuan Qi,

Bei Yu,

Jiaya Jia; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Sitong and Tan, Haoru and Xia, Bin and Zhang, Xichen and Li, Jingyao and Zhang, Shaofeng and Qi, Xiaojuan and Yu, Bei and Jia, Jiaya}, title = {Unlocking Token Rewards via Training-Free Reward Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5082-5091} }
FreeArtGS: Articulated Gaussian Splatting Under Free-moving Scenario: Hang Dai,

Hongwei Fan,

Han Zhang,

Duojin Wu,

Jiyao Zhang,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Hang and Fan, Hongwei and Zhang, Han and Wu, Duojin and Zhang, Jiyao and Dong, Hao}, title = {FreeArtGS: Articulated Gaussian Splatting Under Free-moving Scenario}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11777-11787} }
HyperNAS: Enhancing Architecture Representation for NAS Predictor via Hypernetwork: Jindi Lv,

Yuhao Zhou,

Yuxin Tian,

Qing Ye,

Wentao Feng,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Jindi and Zhou, Yuhao and Tian, Yuxin and Ye, Qing and Feng, Wentao and Lv, Jiancheng}, title = {HyperNAS: Enhancing Architecture Representation for NAS Predictor via Hypernetwork}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12955-12965} }
Act Like a Pathologist: Tissue-Aware Whole Slide Image Reasoning: Wentao Huang,

Weimin Lyu,

Peiliang Lou,

Qingqiao Hu,

Xiaoling Hu,

Shahira Abousamra,

Wenchao Han,

Ruifeng Guo,

Jiawei Zhou,

Chao Chen,

Chen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Wentao and Lyu, Weimin and Lou, Peiliang and Hu, Qingqiao and Hu, Xiaoling and Abousamra, Shahira and Han, Wenchao and Guo, Ruifeng and Zhou, Jiawei and Chen, Chao and Wang, Chen}, title = {Act Like a Pathologist: Tissue-Aware Whole Slide Image Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6972-6981} }
Does YOLO Really Need to See Every Training Image in Every Epoch?: Xingxing Xie,

Jiahua Dong,

Junwei Han,

Gong Cheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Xingxing and Dong, Jiahua and Han, Junwei and Cheng, Gong}, title = {Does YOLO Really Need to See Every Training Image in Every Epoch?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {135-144} }
Learning What Helps: Task-Aligned Context Selection for Vision Tasks: Jingyu Guo,

Emir Konuk,

Fredrik Strand,

Christos Matsoukas,

Kevin Smith; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Jingyu and Konuk, Emir and Strand, Fredrik and Matsoukas, Christos and Smith, Kevin}, title = {Learning What Helps: Task-Aligned Context Selection for Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11632-11642} }
Scaling Spatial Intelligence with Multimodal Foundation Models: Zhongang Cai,

Ruisi Wang,

Chenyang Gu,

Fanyi Pu,

Junxiang Xu,

Yubo Wang,

Wanqi Yin,

Zhitao Yang,

Chen Wei,

Tongxi Zhou,

Qingping Sun,

Hui En Pang,

Jiaqi Li,

Oscar Qian,

Zhiqian Lin,

Xuanke Shi,

Kewang Deng,

Xiaoyang Han,

Zukai Chen,

Xiangyu Fan,

Hanming Deng,

Lewei Lu,

Liang Pan,

Bo Li,

Ziwei Liu,

Quan Wang,

Dahua Lin,

Lei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Zhongang and Wang, Ruisi and Gu, Chenyang and Pu, Fanyi and Xu, Junxiang and Wang, Yubo and Yin, Wanqi and Yang, Zhitao and Wei, Chen and Zhou, Tongxi and Sun, Qingping and Pang, Hui En and Li, Jiaqi and Qian, Oscar and Lin, Zhiqian and Shi, Xuanke and Deng, Kewang and Han, Xiaoyang and Chen, Zukai and Fan, Xiangyu and Deng, Hanming and Lu, Lewei and Pan, Liang and Li, Bo and Liu, Ziwei and Wang, Quan and Lin, Dahua and Yang, Lei}, title = {Scaling Spatial Intelligence with Multimodal Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7879-7890} }
MonoSAOD: Monocular 3D Object Detection with Sparsely Annotated Label: Junyoung Jung,

Seokwon Kim,

Jung Uk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Junyoung and Kim, Seokwon and Kim, Jung Uk}, title = {MonoSAOD: Monocular 3D Object Detection with Sparsely Annotated Label}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4718-4727} }
Direct Segmentation without Logits Optimization for Training-Free Open-Vocabulary Semantic Segmentation: Jiahao Li,

Yang Lu,

Yachao Zhang,

Fangyong Wang,

Yuan Xie,

Yanyun Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Lu, Yang and Zhang, Yachao and Wang, Fangyong and Xie, Yuan and Qu, Yanyun}, title = {Direct Segmentation without Logits Optimization for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13168-13178} }
CrossHOI: Learning Cross-View Representations for Monocular 3D Human-Object Interaction Reconstruction: Pei Geng,

Shanshan Zhang,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Pei and Zhang, Shanshan and Yang, Jian}, title = {CrossHOI: Learning Cross-View Representations for Monocular 3D Human-Object Interaction Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7121-7130} }
ANTS: Adaptive Negative Textual Space Shaping for OOD Detection via Test-Time MLLM Understanding and Reasoning: Wenjie Zhu,

Yabin Zhang,

Xin Jin,

Wenjun Zeng,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenjie and Zhang, Yabin and Jin, Xin and Zeng, Wenjun and Zhang, Lei}, title = {ANTS: Adaptive Negative Textual Space Shaping for OOD Detection via Test-Time MLLM Understanding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20-30} }
Basis-Oriented Low-rank Transfer for Few-Shot and Test-Time Adaptation: Junghwan Park,

Woojin Cho,

Junhyuk Heo,

Darongsae Kwon,

Kookjin Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Junghwan and Cho, Woojin and Heo, Junhyuk and Kwon, Darongsae and Lee, Kookjin}, title = {Basis-Oriented Low-rank Transfer for Few-Shot and Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {860-870} }
GauMVC: Generative Decoupled Gaussian Representation for Human-centric Multi-view Video Compression: Ruoke Yan,

Mingjia Yang,

Xinfeng Zhang,

Haocheng Tang,

Qian Yin,

Zhipin Deng,

Kai Zhang,

Li Zhang,

Siwei Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Ruoke and Yang, Mingjia and Zhang, Xinfeng and Tang, Haocheng and Yin, Qian and Deng, Zhipin and Zhang, Kai and Zhang, Li and Ma, Siwei}, title = {GauMVC: Generative Decoupled Gaussian Representation for Human-centric Multi-view Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4963-4972} }
Rethinking Occlusion Modeling for UAV Tracking: Jian Zhang,

Xincheng Yu,

Yi Lin; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jian and Yu, Xincheng and Lin, Yi}, title = {Rethinking Occlusion Modeling for UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13563-13573} }
LRDUN: A Low-Rank Deep Unfolding Network for Efficient Spectral Compressive Imaging: He Huang,

Yujun Guo,

Wei He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, He and Guo, Yujun and He, Wei}, title = {LRDUN: A Low-Rank Deep Unfolding Network for Efficient Spectral Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10556-10566} }
Exploring Spatial Intelligence from a Generative Perspective: Muzhi Zhu,

Shunyao Jiang,

Huanyi Zheng,

Zekai Luo,

Hao Zhong,

Anzhou Li,

Kaijun Wang,

Jintao Rong,

Yang Liu,

Hao Chen,

Tao Lin,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Muzhi and Jiang, Shunyao and Zheng, Huanyi and Luo, Zekai and Zhong, Hao and Li, Anzhou and Wang, Kaijun and Rong, Jintao and Liu, Yang and Chen, Hao and Lin, Tao and Shen, Chunhua}, title = {Exploring Spatial Intelligence from a Generative Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2582-2592} }
The Blind Spot of Adaptation: Quantifying and Mitigating Forgetting in Fine-tuned Driving Models: Runhao Mao,

Hanshi Wang,

Yixiang Yang,

Qianli Ma,

Jingmeng Zhou,

Zhipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Runhao and Wang, Hanshi and Yang, Yixiang and Ma, Qianli and Zhou, Jingmeng and Zhang, Zhipeng}, title = {The Blind Spot of Adaptation: Quantifying and Mitigating Forgetting in Fine-tuned Driving Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10621-10631} }
CLP: A Real-World Dataset of Contaminated Lens Protectors for Robust Semantic Segmentation: Sungyong Park,

Sooyoung Choi,

Hyunsuh Koh,

Youngjae Choi,

Heewon Kim; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Sungyong and Choi, Sooyoung and Koh, Hyunsuh and Choi, Youngjae and Kim, Heewon}, title = {CLP: A Real-World Dataset of Contaminated Lens Protectors for Robust Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3794-3804} }
InstAP: Instance-Aware Vision-Language Pre-Train for Spatial-Temporal Understanding: Ashutosh Kumar,

Rajat Saini,

Jingjing Pan,

Mustafa Erdogan,

Mingfang Zhang,

Betty Le Dem,

Norimasa Kobori,

Quan Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashutosh and Saini, Rajat and Pan, Jingjing and Erdogan, Mustafa and Zhang, Mingfang and Le Dem, Betty and Kobori, Norimasa and Kong, Quan}, title = {InstAP: Instance-Aware Vision-Language Pre-Train for Spatial-Temporal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3079-3090} }
WaDi: Weight Direction-aware Distillation for One-step Image Synthesis: Lei Wang,

Yang Cheng,

Senmao Li,

Ge Wu,

Yaxing Wang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lei and Cheng, Yang and Li, Senmao and Wu, Ge and Wang, Yaxing and Yang, Jian}, title = {WaDi: Weight Direction-aware Distillation for One-step Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5574-5584} }
Polarization State Tracing for Reflection Removal and Color-Consistent Reconstruction: Dongyue Wang,

Yang Lu,

Jiandong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dongyue and Lu, Yang and Tian, Jiandong}, title = {Polarization State Tracing for Reflection Removal and Color-Consistent Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5680-5689} }
PPISP: Physically-Plausible Compensation and Control of Photometric Variations in Radiance Field Reconstruction: Isaac Deutsch,

Nicolas Moënne-Loccoz,

Gavriel State,

Zan Gojcic; [pdf] [supp]
[bibtex]
@InProceedings{Deutsch_2026_CVPR, author = {Deutsch, Isaac and Mo\"enne-Loccoz, Nicolas and State, Gavriel and Gojcic, Zan}, title = {PPISP: Physically-Plausible Compensation and Control of Photometric Variations in Radiance Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7289-7298} }
MiniCPM-V 4.5: Cooking Efficient MLLMs via Architecture, Data, and Training Recipe: Tianyu Yu,

Zefan Wang,

Chongyi Wang,

Fuwei Huang,

Wenshuo Ma,

Zhihui He,

Tianchi Cai,

Weize Chen,

Yuxiang Huang,

Ranchi Zhao,

Bokai Xu,

Junbo Cui,

Yingjing Xu,

Liqing Ruan,

Luoyuan Zhang,

Hanyu Liu,

Jingkun Tang,

Hongyuan Liu,

Qining Guo,

Wenhao Hu,

Bingxiang He,

Jie Zhou,

Jie Cai,

Ji Qi,

Zonghao Guo,

Chi Chen,

Guoyang Zeng,

Yuxuan Li,

Ganqu Cui,

Ning Ding,

Xu Han,

Yuan Yao,

Zhiyuan Liu,

Maosong Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Tianyu and Wang, Zefan and Wang, Chongyi and Huang, Fuwei and Ma, Wenshuo and He, Zhihui and Cai, Tianchi and Chen, Weize and Huang, Yuxiang and Zhao, Ranchi and Xu, Bokai and Cui, Junbo and Xu, Yingjing and Ruan, Liqing and Zhang, Luoyuan and Liu, Hanyu and Tang, Jingkun and Liu, Hongyuan and Guo, Qining and Hu, Wenhao and He, Bingxiang and Zhou, Jie and Cai, Jie and Qi, Ji and Guo, Zonghao and Chen, Chi and Zeng, Guoyang and Li, Yuxuan and Cui, Ganqu and Ding, Ning and Han, Xu and Yao, Yuan and Liu, Zhiyuan and Sun, Maosong}, title = {MiniCPM-V 4.5: Cooking Efficient MLLMs via Architecture, Data, and Training Recipe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11704-11715} }
Multi-level Causal LLM-based Text-to-Motion Generation with Human Alignment: Xiaodong Chen,

Qian Bao,

Xudong Liu,

Jianping Fang,

Jintao Fang,

Yongdong Zhang,

Tao Mei,

Wu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaodong and Bao, Qian and Liu, Xudong and Fang, Jianping and Fang, Jintao and Zhang, Yongdong and Mei, Tao and Liu, Wu}, title = {Multi-level Causal LLM-based Text-to-Motion Generation with Human Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9342-9351} }
ForeHOI: Feed-forward 3D Object Reconstruction from Daily Hand-Object Interaction Videos: Yuantao Chen,

Jiahao Chang,

Chongjie Ye,

Chaoran Zhang,

Zhaojie Fang,

Chenghong Li,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuantao and Chang, Jiahao and Ye, Chongjie and Zhang, Chaoran and Fang, Zhaojie and Li, Chenghong and Han, Xiaoguang}, title = {ForeHOI: Feed-forward 3D Object Reconstruction from Daily Hand-Object Interaction Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8868-8879} }
Learning Generalizable 3D Medical Image Representations from Mask-Guided Self-Supervision: Yunhe Gao,

Yabin Zhang,

Chong Wang,

Jiaming Liu,

Maya Varma,

Jean-Benoit Delbrouck,

Akshay Chaudhari,

Curtis Langlotz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yunhe and Zhang, Yabin and Wang, Chong and Liu, Jiaming and Varma, Maya and Delbrouck, Jean-Benoit and Chaudhari, Akshay and Langlotz, Curtis}, title = {Learning Generalizable 3D Medical Image Representations from Mask-Guided Self-Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13744-13754} }
Beyond Matching to Tiles: Bridging Unaligned Aerial and Satellite Views for Vision-Only UAV Navigation: Kejia Liu,

Haoyang Zhou,

Ruoyu Xu,

Peicheng Wang,

Mingli Song,

Haofei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Kejia and Zhou, Haoyang and Xu, Ruoyu and Wang, Peicheng and Song, Mingli and Zhang, Haofei}, title = {Beyond Matching to Tiles: Bridging Unaligned Aerial and Satellite Views for Vision-Only UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5359-5368} }
TRM-VLA: Temporal-Aware Chain-of-Thought Reasoning and Memorization for Vision-Language-Action Models: Xiang Li,

Ya-Li Li,

Yuan Wang,

Shengjin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Li, Ya-Li and Wang, Yuan and Wang, Shengjin}, title = {TRM-VLA: Temporal-Aware Chain-of-Thought Reasoning and Memorization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10943-10953} }
Make it SING: Analyzing Semantic Invariants in Classifiers: Harel Yadid,

Meir Yossef Levi,

Roy Betser,

Guy Gilboa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yadid_2026_CVPR, author = {Yadid, Harel and Levi, Meir Yossef and Betser, Roy and Gilboa, Guy}, title = {Make it SING: Analyzing Semantic Invariants in Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9911-9920} }
PRISM: Prototype-based Reasoning with Inter-modal Semantic Mining for Interpretable Image Recognition: Anni Yu,

Yu-Bin Yang; [pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Anni and Yang, Yu-Bin}, title = {PRISM: Prototype-based Reasoning with Inter-modal Semantic Mining for Interpretable Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2853-2863} }
Event Structural Valley: A Unified Theoretical and Practical Framework for Event Camera Autofocus: Xijie Xiang,

Lin Zhu,

Wei Zhang,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Xijie and Zhu, Lin and Zhang, Wei and Tian, Yonghong}, title = {Event Structural Valley: A Unified Theoretical and Practical Framework for Event Camera Autofocus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {936-945} }
BiMotion: B-spline Motion for Text-guided Dynamic 3D Character Generation: Miaowei Wang,

Qingxuan Yan,

Zhi Cao,

Yayuan Li,

Oisin Mac Aodha,

Jason J Corso,

Amir Vaxman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Miaowei and Yan, Qingxuan and Cao, Zhi and Li, Yayuan and Mac Aodha, Oisin and Corso, Jason J and Vaxman, Amir}, title = {BiMotion: B-spline Motion for Text-guided Dynamic 3D Character Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10152-10164} }
JRM: Joint Reconstruction Model for Multiple Objects without Alignment: Qirui Wu,

Yawar Siddiqui,

Duncan Frost,

Samir Aroudj,

Armen Avetisyan,

Richard Newcombe,

Angel X. Chang,

Jakob Engel,

Henry Howard-Jenkins; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Qirui and Siddiqui, Yawar and Frost, Duncan and Aroudj, Samir and Avetisyan, Armen and Newcombe, Richard and Chang, Angel X. and Engel, Jakob and Howard-Jenkins, Henry}, title = {JRM: Joint Reconstruction Model for Multiple Objects without Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {307-316} }
CAPT: Confusion-Aware Prompt Tuning for Reducing Vision-Language Misalignment: Maoyuan Shao,

Yutong Gao,

Xinyang Huang,

Lijuan Sun,

Guoshun Nan,

Chuang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Maoyuan and Gao, Yutong and Huang, Xinyang and Sun, Lijuan and Nan, Guoshun and Zhu, Chuang}, title = {CAPT: Confusion-Aware Prompt Tuning for Reducing Vision-Language Misalignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3154-3164} }
EG-3DVG: Expression and Geometry Aware Grounding Decoder for 3D Visual Grounding: GwangWook Park,

Hyo-Jun Lee,

Jong-Hyeon Baek,

Hanul Kim,

Yeong Jun Koh; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, GwangWook and Lee, Hyo-Jun and Baek, Jong-Hyeon and Kim, Hanul and Koh, Yeong Jun}, title = {EG-3DVG: Expression and Geometry Aware Grounding Decoder for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2625-2634} }
LLaDA-V: Large Language Diffusion Models with Visual Instruction Tuning: Zebin You,

Shen Nie,

Xiaolu Zhang,

JUN ZHOU,

Zhiwu Lu,

Ji-Rong Wen,

Chongxuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Zebin and Nie, Shen and Zhang, Xiaolu and ZHOU, JUN and Lu, Zhiwu and Wen, Ji-Rong and Li, Chongxuan}, title = {LLaDA-V: Large Language Diffusion Models with Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10093-10105} }
C-GenReg: Training-Free 3D Point Cloud Registration by Multi-View-Consistent Geometry-to-Image Generation with Probabilistic Modalities Fusion: Yuval Haitman,

Amit Efraim,

Joseph M. Francos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haitman_2026_CVPR, author = {Haitman, Yuval and Efraim, Amit and Francos, Joseph M.}, title = {C-GenReg: Training-Free 3D Point Cloud Registration by Multi-View-Consistent Geometry-to-Image Generation with Probabilistic Modalities Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3004-3013} }
Synergistic Bleeding Region and Point Detection in Laparoscopic Surgical Videos: Jialun Pei,

Zhangjun Zhou,

Diandian Guo,

Zhixi Li,

Jing Qin,

Bo Du,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2026_CVPR, author = {Pei, Jialun and Zhou, Zhangjun and Guo, Diandian and Li, Zhixi and Qin, Jing and Du, Bo and Heng, Pheng-Ann}, title = {Synergistic Bleeding Region and Point Detection in Laparoscopic Surgical Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1396-1405} }
CoIn: Coverage and Informativeness-Guided Token Reduction for Efficient Large Multimodal Models: Chenxi Du,

Yongheng Deng,

Jiani Liu,

Yujia Zhang,

Xi Chen,

Ju Ren; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Chenxi and Deng, Yongheng and Liu, Jiani and Zhang, Yujia and Chen, Xi and Ren, Ju}, title = {CoIn: Coverage and Informativeness-Guided Token Reduction for Efficient Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10492-10501} }
EVA: Efficient Reinforcement Learning for End-to-End Video Agent: Yaolun Zhang,

Ruohui Wang,

Jiahao Wang,

Yepeng Tang,

Xuanyu Zheng,

Haonan Duan,

Hao Lu,

Hanming Deng,

Lewei Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yaolun and Wang, Ruohui and Wang, Jiahao and Tang, Yepeng and Zheng, Xuanyu and Duan, Haonan and Lu, Hao and Deng, Hanming and Lu, Lewei}, title = {EVA: Efficient Reinforcement Learning for End-to-End Video Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12289-12299} }
When Local Rules Create Global Order: Self-Organized Representation Learning for Latent Diffusion Models: Junrong Lian,

Weijian Deng,

Pengxu Wei,

Yaqin Chen,

Qixiang Ye,

Liang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Junrong and Deng, Weijian and Wei, Pengxu and Chen, Yaqin and Ye, Qixiang and Lin, Liang}, title = {When Local Rules Create Global Order: Self-Organized Representation Learning for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9445-9454} }
MeshFlow: Efficient Artistic Mesh Generation via MeshVAE and Flow-based Diffusion Transformer: Weiyu Li,

Antoine Toisoul,

Tom Monnier,

Roman Shapovalov,

Rakesh Ranjan,

Ping Tan,

Andrea Vedaldi; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiyu and Toisoul, Antoine and Monnier, Tom and Shapovalov, Roman and Ranjan, Rakesh and Tan, Ping and Vedaldi, Andrea}, title = {MeshFlow: Efficient Artistic Mesh Generation via MeshVAE and Flow-based Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5849-5858} }
CrackSSM: Reviving SSMs for Crack Segmentation via Dynamic Scanning: Yubin Gu,

Boyang Hou,

Yuan Meng,

Wenting Luo,

Jiayi Ji,

Xiaoshuai Sun; [pdf]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Yubin and Hou, Boyang and Meng, Yuan and Luo, Wenting and Ji, Jiayi and Sun, Xiaoshuai}, title = {CrackSSM: Reviving SSMs for Crack Segmentation via Dynamic Scanning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10721-10730} }
Rectifying Latent Space for Generative Single-Image Reflection Removal: Mingjia Li,

Jin Hu,

Hainuo Wang,

Qiming Hu,

Jiarui Wang,

Xiaojie Guo; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingjia and Hu, Jin and Wang, Hainuo and Hu, Qiming and Wang, Jiarui and Guo, Xiaojie}, title = {Rectifying Latent Space for Generative Single-Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8397-8407} }
Unstitching the Chimera: Frame-Level Risk and Train-Free Mitigation for Video Hallucination: Songyuan Yang,

Guijian Tang,

Kun Hu,

Haotian Wang,

Shixuan Liu,

Wenjing Yang,

Long Lan,

Huibin Tan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Tang, Guijian and Hu, Kun and Wang, Haotian and Liu, Shixuan and Yang, Wenjing and Lan, Long and Tan, Huibin}, title = {Unstitching the Chimera: Frame-Level Risk and Train-Free Mitigation for Video Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4188-4198} }
Refracting Reality: Generating Images with Realistic Transparent Objects: Yue Yin,

Enze Tao,

Dylan Campbell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Yue and Tao, Enze and Campbell, Dylan}, title = {Refracting Reality: Generating Images with Realistic Transparent Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4312-4321} }
Refining Few-Step Text-to-Multiview Diffusion via Reinforcement Learning: Ziyi Zhang,

Li Shen,

Deheng Ye,

Yong Luo,

Huangxuan Zhao,

Meng Liu,

Wei Yu,

Lefei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ziyi and Shen, Li and Ye, Deheng and Luo, Yong and Zhao, Huangxuan and Liu, Meng and Yu, Wei and Zhang, Lefei}, title = {Refining Few-Step Text-to-Multiview Diffusion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2401-2411} }
Tri-Modal Fusion Transformers for UAV-based Object Detection: Craig Iaboni,

Pramod Abichandani; [pdf] [arXiv]
[bibtex]
@InProceedings{Iaboni_2026_CVPR, author = {Iaboni, Craig and Abichandani, Pramod}, title = {Tri-Modal Fusion Transformers for UAV-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4373-4382} }
RetFormer: Multimodal Retrieval for Enhancing Image Recognition: Tianrui Yu,

Xiubo Liang,

Hongzhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Tianrui and Liang, Xiubo and Wang, Hongzhi}, title = {RetFormer: Multimodal Retrieval for Enhancing Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2704-2714} }
Twin-T & TwintVQA: A Reliable Structure-Detail Separating VLM and a Comprehensive Benchmark for Chart and Table Tasks: Jiahua Bao,

Siyao Cheng,

Jiaxing Du,

Qingtao Xia,

Changjiang He,

Zeming Lang,

Jie Liu; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Jiahua and Cheng, Siyao and Du, Jiaxing and Xia, Qingtao and He, Changjiang and Lang, Zeming and Liu, Jie}, title = {Twin-T \& TwintVQA: A Reliable Structure-Detail Separating VLM and a Comprehensive Benchmark for Chart and Table Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4850-4859} }
HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation: Chengjie Fan,

Cong Pan,

Zijian Liu,

Ningzhong Liu,

Jie Qin; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Chengjie and Pan, Cong and Liu, Zijian and Liu, Ningzhong and Qin, Jie}, title = {HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10976-10985} }
NanoSD: Edge Efficient Foundation Model for Real Time Image Restoration: Subhajit Sanyal,

Srinivas Soumitri Miriyala,

Akshay Janardan Bankar,

Manjunath Arveti,

Sowmya Vajrala,

Shreyas Pandith,

Sravanth Kodavanti,

Abhishek Ameta,

Harshit Harshit,

Amit Satish Unde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sanyal_2026_CVPR, author = {Sanyal, Subhajit and Miriyala, Srinivas Soumitri and Bankar, Akshay Janardan and Arveti, Manjunath and Vajrala, Sowmya and Pandith, Shreyas and Kodavanti, Sravanth and Ameta, Abhishek and Harshit, Harshit and Unde, Amit Satish}, title = {NanoSD: Edge Efficient Foundation Model for Real Time Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8449-8459} }
Teacher-Guided Routing for Sparse Vision Mixture-of-Experts: Masahiro Kada,

Ryota Yoshihashi,

Satoshi Ikehata,

Rei Kawakami,

Ikuro Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kada_2026_CVPR, author = {Kada, Masahiro and Yoshihashi, Ryota and Ikehata, Satoshi and Kawakami, Rei and Sato, Ikuro}, title = {Teacher-Guided Routing for Sparse Vision Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6198-6208} }
AstraNav-Memory: Contexts Compression for Long Memory: Junjun Hu,

Xinda Xue,

Botao Ren,

Minghua Luo,

Jintao Chen,

Haochen Bai,

Liangliang You,

Mu Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Junjun and Xue, Xinda and Ren, Botao and Luo, Minghua and Chen, Jintao and Bai, Haochen and You, Liangliang and Xu, Mu}, title = {AstraNav-Memory: Contexts Compression for Long Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8097-8109} }
Globally Optimal Pose from Orthographic Silhouettes: Agniva Sengupta,

Dilara Kus,

Jianning Li,

Stefan Zachow; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sengupta_2026_CVPR, author = {Sengupta, Agniva and Kus, Dilara and Li, Jianning and Zachow, Stefan}, title = {Globally Optimal Pose from Orthographic Silhouettes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11029-11038} }
Computation and Communication Efficient Federated Unlearning via On-server Gradient Conflict Mitigation and Expression: Minh-Duong Nguyen,

Senura Wanasekara,

Le-Tuan Nguyen,

Quoc-Viet Pham,

Ken-Tye Yong,

Nguyen H. Tran,

Dung D. Le; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Minh-Duong and Wanasekara, Senura and Nguyen, Le-Tuan and Pham, Quoc-Viet and Yong, Ken-Tye and Tran, Nguyen H. and Le, Dung D.}, title = {Computation and Communication Efficient Federated Unlearning via On-server Gradient Conflict Mitigation and Expression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3347-3357} }
Mind the Gap: Transferring Labels to Align Object Detection Datasets: Mikhail Kennerley,

Angelica I. Aviles-Rivero,

Carola-Bibiane Schönlieb,

Robby T. Tan; [pdf] [supp]
[bibtex]
@InProceedings{Kennerley_2026_CVPR, author = {Kennerley, Mikhail and Aviles-Rivero, Angelica I. and Sch\"onlieb, Carola-Bibiane and Tan, Robby T.}, title = {Mind the Gap: Transferring Labels to Align Object Detection Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4353-4362} }
Decoupled Generative Modeling for Human-Object Interaction Synthesis: Hwanhee Jung,

Seunggwan Lee,

Jeongyoon Yoon,

SeungHyeon Kim,

Giljoo Nam,

Qixing Huang,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Hwanhee and Lee, Seunggwan and Yoon, Jeongyoon and Kim, SeungHyeon and Nam, Giljoo and Huang, Qixing and Kim, Sangpil}, title = {Decoupled Generative Modeling for Human-Object Interaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2253-2263} }
SpatialReward: Verifiable Spatial Reward Modeling for Fine-Grained Spatial Consistency in Text-to-Image Generation: Sashuai Zhou,

Qiang Zhou,

Junpeng Ma,

Yue Cao,

Ruofan Hu,

Ziang Zhang,

Xiaoda Yang,

Zhibin Wang,

Jun Song,

Cheng Yu,

Bo Zheng,

Zhou Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Sashuai and Zhou, Qiang and Ma, Junpeng and Cao, Yue and Hu, Ruofan and Zhang, Ziang and Yang, Xiaoda and Wang, Zhibin and Song, Jun and Yu, Cheng and Zheng, Bo and Zhao, Zhou}, title = {SpatialReward: Verifiable Spatial Reward Modeling for Fine-Grained Spatial Consistency in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {647-658} }
Feed-Forward One-Shot Animatable Textured Mesh Avatar Reconstruction: Yisheng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yisheng}, title = {Feed-Forward One-Shot Animatable Textured Mesh Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4145-4156} }
Tokenization Allows Multimodal Large Language Models to Understand, Generate and Edit Architectural Floor Plans: Sizhong Qin,

Ramon Elias Weber,

Xinzheng Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Sizhong and Weber, Ramon Elias and Lu, Xinzheng}, title = {Tokenization Allows Multimodal Large Language Models to Understand, Generate and Edit Architectural Floor Plans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10430-10440} }
RetimeGS: Continuous-Time Reconstruction of 4D Gaussian Splatting: Xuezhen Wang,

Li Ma,

Yulin Shen,

Zeyu Wang,

Pedro V. Sander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuezhen and Ma, Li and Shen, Yulin and Wang, Zeyu and Sander, Pedro V.}, title = {RetimeGS: Continuous-Time Reconstruction of 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7340-7350} }
Trainable Log-linear Sparse Attention for Efficient Diffusion Transformers: Yifan Zhou,

Zeqi Xiao,

Tianyi Wei,

Shuai Yang,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yifan and Xiao, Zeqi and Wei, Tianyi and Yang, Shuai and Pan, Xingang}, title = {Trainable Log-linear Sparse Attention for Efficient Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9424-9433} }
Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views: Zhangquan Chen,

Manyuan Zhang,

Xinlei Yu,

Xufang Luo,

Mingze Sun,

Zihao Pan,

Xiang An,

Yan Feng,

Peng Pei,

Xunliang Cai,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhangquan and Zhang, Manyuan and Yu, Xinlei and Luo, Xufang and Sun, Mingze and Pan, Zihao and An, Xiang and Feng, Yan and Pei, Peng and Cai, Xunliang and Huang, Ruqi}, title = {Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2613-2624} }
Neural Field-Based 3D Surface Reconstruction of Microstructures from Multi-Detector Signals in Scanning Electron Microscopy: Shuo Chen,

Yijin Li,

Xi Zheng,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shuo and Li, Yijin and Zheng, Xi and Zhang, Guofeng}, title = {Neural Field-Based 3D Surface Reconstruction of Microstructures from Multi-Detector Signals in Scanning Electron Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7266-7277} }
RAVEN: Erasing Invisible Watermarks via Novel View Synthesis: Fahad Shamshad,

Nils Lukas,

Karthik Nandakumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shamshad_2026_CVPR, author = {Shamshad, Fahad and Lukas, Nils and Nandakumar, Karthik}, title = {RAVEN: Erasing Invisible Watermarks via Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {82-91} }
Prune Wisely, Reconstruct Sharply: Compact 3D Gaussian Splatting via Adaptive Pruning and Difference-of-Gaussian Primitives: Haoran Wang,

Guoxi Huang,

Fan Zhang,

David Bull,

Nantheera Anantrasirichai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Haoran and Huang, Guoxi and Zhang, Fan and Bull, David and Anantrasirichai, Nantheera}, title = {Prune Wisely, Reconstruct Sharply: Compact 3D Gaussian Splatting via Adaptive Pruning and Difference-of-Gaussian Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11716-11725} }
Paparazzo: Active Mapping of Moving 3D Objects: Davide Allegro,

Shiyao Li,

Stefano Ghidoni,

Vincent Lepetit; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Allegro_2026_CVPR, author = {Allegro, Davide and Li, Shiyao and Ghidoni, Stefano and Lepetit, Vincent}, title = {Paparazzo: Active Mapping of Moving 3D Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12586-12594} }
FlowFM: Advancing Dark Optical Flow Estimation with Flow Matching: Fengyuan Zuo,

Haiyan Jin,

Yuanlin Zhang,

Zhaolin Xiao,

Bin Wang,

Yuerong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Fengyuan and Jin, Haiyan and Zhang, Yuanlin and Xiao, Zhaolin and Wang, Bin and Mu, Yuerong}, title = {FlowFM: Advancing Dark Optical Flow Estimation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6837-6846} }
Residual Diffusion Bridge Model for Image Restoration: Hebaixu Wang,

Jing Zhang,

Haoyang Chen,

Haonan Guo,

Di Wang,

Jiayi Ma,

Bo Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hebaixu and Zhang, Jing and Chen, Haoyang and Guo, Haonan and Wang, Di and Ma, Jiayi and Du, Bo}, title = {Residual Diffusion Bridge Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8375-8386} }
Rewis3d: Reconstruction Improves Weakly-Supervised Semantic Segmentation: Jonas Ernst,

Wolfgang Boettcher,

Lukas Hoyer,

Jan Eric Lenssen,

Bernt Schiele; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ernst_2026_CVPR, author = {Ernst, Jonas and Boettcher, Wolfgang and Hoyer, Lukas and Lenssen, Jan Eric and Schiele, Bernt}, title = {Rewis3d: Reconstruction Improves Weakly-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13091-13101} }
Keep it SymPL: Symbolic Projective Layout for Allocentric Spatial Reasoning in Vision-Language Models: Jaeyun Jang,

Seunghui Shin,

Taeho Park,

Hyoseok Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Jaeyun and Shin, Seunghui and Park, Taeho and Hwang, Hyoseok}, title = {Keep it SymPL: Symbolic Projective Layout for Allocentric Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9604-9614} }
QueryMe: Query-Driven Open-Vocabulary 3D Object Affordances Grounding from Multimodal Evidence: Weiyu Zhao,

Ru Li,

Jiaqi Liu,

Sizhe Zhao,

Qinglin Liu,

Shengping Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Weiyu and Li, Ru and Liu, Jiaqi and Zhao, Sizhe and Liu, Qinglin and Zhang, Shengping}, title = {QueryMe: Query-Driven Open-Vocabulary 3D Object Affordances Grounding from Multimodal Evidence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2603-2612} }
DREAM: Document Recognition with Explicit Adaptive Memory: Tianqi Zhao,

Di Wu,

Liangrui Peng,

Yifan Huang,

Kemeng Zhao,

Shuo Li,

Zhiyu Li,

Yizhu Wang,

Borui Jiang,

Yuyang Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Tianqi and Wu, Di and Peng, Liangrui and Huang, Yifan and Zhao, Kemeng and Li, Shuo and Li, Zhiyu and Wang, Yizhu and Jiang, Borui and Li, Yuyang}, title = {DREAM: Document Recognition with Explicit Adaptive Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2715-2724} }
RHINO: Reconstructing Human Interactions with Novel Objects from Monocular Videos: Lixin Xue,

Chengwei Zheng,

Georgios Paschalidis,

Chen Guo,

Manuel Kaufmann,

Juan Zarate,

Dimitrios Tzionas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Lixin and Zheng, Chengwei and Paschalidis, Georgios and Guo, Chen and Kaufmann, Manuel and Zarate, Juan and Tzionas, Dimitrios}, title = {RHINO: Reconstructing Human Interactions with Novel Objects from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13835-13845} }
Geoint-R1: Formalizing Multimodal Geometric Reasoning with Dynamic Auxiliary Constructions: Jingxuan Wei,

Caijun Jia,

Qi Chen,

Honghao He,

Linzhuang Sun,

Conghui He,

Lijun Wu,

Bihui Yu,

Cheng Tan; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jingxuan and Jia, Caijun and Chen, Qi and He, Honghao and Sun, Linzhuang and He, Conghui and Wu, Lijun and Yu, Bihui and Tan, Cheng}, title = {Geoint-R1: Formalizing Multimodal Geometric Reasoning with Dynamic Auxiliary Constructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2547-2556} }
Geometric Neural Distance Fields for Learning Human Motion Priors: Zhengdi Yu,

Simone Foti,

Linguang Zhang,

g921@gmail.com Meta Reality Labs,

Amy Zhao,

Cem Keskin,

Stefanos Zafeiriou,

Tolga Birdal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Zhengdi and Foti, Simone and Zhang, Linguang and Labs, g921@gmail.com Meta Reality and Zhao, Amy and Keskin, Cem and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Geometric Neural Distance Fields for Learning Human Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2232-2242} }
Tell2Adapt: A Unified Framework for Source Free Unsupervised Domain Adaptation via Vision Foundation Model: Yulong Shi,

Shijie Li,

Ziyi Li,

Lin Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yulong and Li, Shijie and Li, Ziyi and Qi, Lin}, title = {Tell2Adapt: A Unified Framework for Source Free Unsupervised Domain Adaptation via Vision Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6941-6950} }
Probing and Bridging Geometry-Interaction Cues for Affordance Reasoning in Vision Foundation Models: Qing Zhang,

Xuesong Li,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qing and Li, Xuesong and Zhang, Jing}, title = {Probing and Bridging Geometry-Interaction Cues for Affordance Reasoning in Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2526-2536} }
The Devil Is in Gradient Entanglement: Energy-Aware Gradient Coordinator for Robust Generalized Category Discovery: Haiyang Zheng,

Nan Pu,

Yaqi Cai,

Teng Long,

Wenjing Li,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haiyang and Pu, Nan and Cai, Yaqi and Long, Teng and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {The Devil Is in Gradient Entanglement: Energy-Aware Gradient Coordinator for Robust Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3563-3573} }
IF-Bench: Benchmarking and Enhancing MLLMs for Infrared Images with Generative Visual Prompting: Tao Zhang,

Yuyang Hong,

Yang Xia,

Kun Ding,

Zeyu Zhang,

Ying Wang,

Shiming Xiang,

Chunhong Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tao and Hong, Yuyang and Xia, Yang and Ding, Kun and Zhang, Zeyu and Wang, Ying and Xiang, Shiming and Pan, Chunhong}, title = {IF-Bench: Benchmarking and Enhancing MLLMs for Infrared Images with Generative Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8205-8215} }
DENALI: A Dataset Enabling Non-Line-of-Sight Spatial Reasoning with Low-Cost LiDARs: Nikhil Behari,

Diego Rivero,

Luke Apostolides,

Suman Ghosh,

Paul Pu Liang,

Ramesh Raskar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behari_2026_CVPR, author = {Behari, Nikhil and Rivero, Diego and Apostolides, Luke and Ghosh, Suman and Liang, Paul Pu and Raskar, Ramesh}, title = {DENALI: A Dataset Enabling Non-Line-of-Sight Spatial Reasoning with Low-Cost LiDARs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3046-3055} }
FoV-Net: Rotation-Invariant CAD B-rep Learning via Field-of-View Ray Casting: Matteo Ballegeer,

Dries F. Benoit; [pdf] [arXiv]
[bibtex]
@InProceedings{Ballegeer_2026_CVPR, author = {Ballegeer, Matteo and Benoit, Dries F.}, title = {FoV-Net: Rotation-Invariant CAD B-rep Learning via Field-of-View Ray Casting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3024-3034} }
RE-VLM: Event-Augmented Vision-Language Model for Scene Understanding: Hanqing Liu,

Mingjie Liu,

Luoping Cui,

Endian Lin,

Donghong Jiang,

Chuang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hanqing and Liu, Mingjie and Cui, Luoping and Lin, Endian and Jiang, Donghong and Zhu, Chuang}, title = {RE-VLM: Event-Augmented Vision-Language Model for Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10377-10386} }
Neural Dynamic GI: Random-Access Neural Compression for Temporal Lightmaps in Dynamic Lighting Environments: Jianhui Wu,

Jian Zhou,

Zhi Zhou,

Zhangjin Huang,

Chao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jianhui and Zhou, Jian and Zhou, Zhi and Huang, Zhangjin and Li, Chao}, title = {Neural Dynamic GI: Random-Access Neural Compression for Temporal Lightmaps in Dynamic Lighting Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5316-5325} }
HAD: Heterogeneity-Aware Distillation for Lifelong Heterogeneous Learning: Xuerui Zhang,

Xuehao Wang,

Zhan Zhuang,

Linglan Zhao,

Ziyue Li,

Xinmin Zhang,

Zhihuan Song,

Yu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuerui and Wang, Xuehao and Zhuang, Zhan and Zhao, Linglan and Li, Ziyue and Zhang, Xinmin and Song, Zhihuan and Zhang, Yu}, title = {HAD: Heterogeneity-Aware Distillation for Lifelong Heterogeneous Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10863-10873} }
Cross-Modal Emotion Transfer for Emotion Editing in Talking Face Video: Chanhyuk Choi,

Taesoo Kim,

Donggyu Lee,

Siyeol Jung,

Taehwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Chanhyuk and Kim, Taesoo and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Cross-Modal Emotion Transfer for Emotion Editing in Talking Face Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1759-1770} }
Harnessing the Power of Foundation Models for Accurate Material Classification: Qingran Lin,

Fengwei Yang,

Chaolun Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Qingran and Yang, Fengwei and Zhu, Chaolun}, title = {Harnessing the Power of Foundation Models for Accurate Material Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3636-3645} }
OnlineHMR: Video-based Online World-Grounded Human Mesh Recovery: Yiwen Zhao,

Ce Zheng,

Yufu Wang,

Hsueh-Han Daniel Yang,

Liting Wen,

László A. Jeni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yiwen and Zheng, Ce and Wang, Yufu and Yang, Hsueh-Han Daniel and Wen, Liting and Jeni, L\'aszl\'o A.}, title = {OnlineHMR: Video-based Online World-Grounded Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13951-13961} }
I'm a Map! Interpretable Motion-Attentive Maps: Spatio-Temporally Localizing Concepts in Video Diffusion Transformers: Youngjun Jun,

Seil Kang,

Woojung Han,

Seong Jae Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Jun_2026_CVPR, author = {Jun, Youngjun and Kang, Seil and Han, Woojung and Hwang, Seong Jae}, title = {I'm a Map! Interpretable Motion-Attentive Maps: Spatio-Temporally Localizing Concepts in Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11525-11535} }
Parallel Jacobi Decoding for Fast Autoregressive Image Generation: Boya Liao,

Ying Li,

Siyong Jian,

Huan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Boya and Li, Ying and Jian, Siyong and Wang, Huan}, title = {Parallel Jacobi Decoding for Fast Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9008-9018} }
HamiPose: Hamiltonian Optimization for Unsupervised Domain Adaptive Pose Estimation: Jiawen Li,

Fei Jiang,

Dandan Zhu,

Aimin Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiawen and Jiang, Fei and Zhu, Dandan and Zhou, Aimin}, title = {HamiPose: Hamiltonian Optimization for Unsupervised Domain Adaptive Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13856-13865} }
CGU-Bayes: Causal Graph Uncertainty-Guided Bayesian Inference for Domain Generalization: Naiyu Yin,

Hanjing Wang,

Yue Yu,

Tian Gao,

Amit Dhurandhar,

Chung-Hao Lee,

Qiang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Naiyu and Wang, Hanjing and Yu, Yue and Gao, Tian and Dhurandhar, Amit and Lee, Chung-Hao and Ji, Qiang}, title = {CGU-Bayes: Causal Graph Uncertainty-Guided Bayesian Inference for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10522-10532} }
MSCD-GS: Motion-Separated Cooperative Deblurring Dynamic Reconstruction via Gaussian Splatting: Yongjian Liao,

Xu Zou,

Wenjun Chen,

Huixuan Li,

Xiaoen Xie,

Chunxi Li,

Shixiang Huang,

Gang Zhang,

Jiahuan Zhou,

Sheng Zhong,

Luxin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Yongjian and Zou, Xu and Chen, Wenjun and Li, Huixuan and Xie, Xiaoen and Li, Chunxi and Huang, Shixiang and Zhang, Gang and Zhou, Jiahuan and Zhong, Sheng and Yan, Luxin}, title = {MSCD-GS: Motion-Separated Cooperative Deblurring Dynamic Reconstruction via Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11726-11735} }
3D-Aware Multi-Task Learning with Cross-View Correlations for Dense Scene Understanding: Xiaoye Wang,

Chen Tang,

Xiangyu Yue,

Wei-Hong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoye and Tang, Chen and Yue, Xiangyu and Li, Wei-Hong}, title = {3D-Aware Multi-Task Learning with Cross-View Correlations for Dense Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5793-5803} }
Multigrain-aware Semantic Prototype Scanning and Tri-Token Prompt Learning Embraced High-Order RWKV for Pan-Sharpening: Junfeng Li,

Wenyang Zhou,

Xueheng Li,

Xuanhua He,

Jianhou Gan,

Wenqi Ren; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Junfeng and Zhou, Wenyang and Li, Xueheng and He, Xuanhua and Gan, Jianhou and Ren, Wenqi}, title = {Multigrain-aware Semantic Prototype Scanning and Tri-Token Prompt Learning Embraced High-Order RWKV for Pan-Sharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13234-13243} }
Repurposing 3D Generative Model for Autoregressive Layout Generation: Haoran Feng,

Yifan Niu,

Zehuan Huang,

Yang-Tian Sun,

Chunchao Guo,

Yuxin Peng,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Haoran and Niu, Yifan and Huang, Zehuan and Sun, Yang-Tian and Guo, Chunchao and Peng, Yuxin and Sheng, Lu}, title = {Repurposing 3D Generative Model for Autoregressive Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3231-3243} }
RnG: A Unified Transformer for Complete 3D Modeling from Partial Observations: Mochu Xiang,

Zhelun Shen,

Xuesong Li,

Jiahui Ren,

Jing Zhang,

Chen Zhao,

Shanshan Liu,

Haocheng Feng,

Jingdong Wang,

Yuchao Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Mochu and Shen, Zhelun and Li, Xuesong and Ren, Jiahui and Zhang, Jing and Zhao, Chen and Liu, Shanshan and Feng, Haocheng and Wang, Jingdong and Dai, Yuchao}, title = {RnG: A Unified Transformer for Complete 3D Modeling from Partial Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {406-416} }
AKCMamba-YOLO: Selective State Space Models For Real-Time Object Detection: Long Chen,

Hui Wang,

Man Xu,

Zexuan Li,

Zizhu Fan; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Long and Wang, Hui and Xu, Man and Li, Zexuan and Fan, Zizhu}, title = {AKCMamba-YOLO: Selective State Space Models For Real-Time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4438-4447} }
Global Information Thresholding for Sufficient and Necessary Circuits: Jegyeong Cho; [pdf]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Jegyeong}, title = {Global Information Thresholding for Sufficient and Necessary Circuits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3264-3273} }
GeoRK2: Geometry-Guided Runge-Kutta Integration for Diffusion Transformer Acceleration: Chaoqun Sun,

Zongjing Fu,

Powei Chang,

Jinpeng Zhang,

Jianxiang Xiang,

Yukang Gao,

Chenyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Chaoqun and Fu, Zongjing and Chang, Powei and Zhang, Jinpeng and Xiang, Jianxiang and Gao, Yukang and Wang, Chenyu}, title = {GeoRK2: Geometry-Guided Runge-Kutta Integration for Diffusion Transformer Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9404-9413} }
WonderZoom: Multi-Scale 3D World Generation: Jin Cao,

Hong-Xing Yu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Jin and Yu, Hong-Xing and Wu, Jiajun}, title = {WonderZoom: Multi-Scale 3D World Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5859-5869} }
Concept-Aware Batch Sampling Improves Language-Image Pretraining: Adhiraj Ghosh,

Vishaal Udandarao,

Thao Nguyen,

Matteo Farina,

Mehdi Cherti,

Jenia Jitsev,

Sewoong Oh,

Elisa Ricci,

Ludwig Schmidt,

Matthias Bethge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Adhiraj and Udandarao, Vishaal and Nguyen, Thao and Farina, Matteo and Cherti, Mehdi and Jitsev, Jenia and Oh, Sewoong and Ricci, Elisa and Schmidt, Ludwig and Bethge, Matthias}, title = {Concept-Aware Batch Sampling Improves Language-Image Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3056-3068} }
GeoFree-CoSeg: Unsupervised Point Cloud-Image Cross-Modal Co-Segmentation Without Geometric Alignment: Xin Duan,

Xiabi Liu,

Liyuan Pan; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Xin and Liu, Xiabi and Pan, Liyuan}, title = {GeoFree-CoSeg: Unsupervised Point Cloud-Image Cross-Modal Co-Segmentation Without Geometric Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10778-10788} }
Any Resolution Any Geometry: From Multi-View To Multi-Patch: Wenqing Cui,

Zhenyu Li,

Mykola Lavreniuk,

Jian Shi,

Ramzi Idoughi,

Xiangjun Tang,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Wenqing and Li, Zhenyu and Lavreniuk, Mykola and Shi, Jian and Idoughi, Ramzi and Tang, Xiangjun and Wonka, Peter}, title = {Any Resolution Any Geometry: From Multi-View To Multi-Patch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12576-12585} }
Ultrasound-CLIP: Semantic-Aware Contrastive Pre-training for Ultrasound Image-Text Understanding: Jiayun Jin,

Haolong Chai,

Xueying Huang,

Xiaoqing Guo,

Zengwei Zheng,

Zhan Zhou,

Junmei Wang,

Xinyu Wang,

Jie Liu,

Binbin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Jiayun and Chai, Haolong and Huang, Xueying and Guo, Xiaoqing and Zheng, Zengwei and Zhou, Zhan and Wang, Junmei and Wang, Xinyu and Liu, Jie and Zhou, Binbin}, title = {Ultrasound-CLIP: Semantic-Aware Contrastive Pre-training for Ultrasound Image-Text Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6962-6971} }
Mitigating Error Amplification in Fast Adversarial Training: Mengnan Zhao,

Lihe Zhang,

Bo Wang,

Tianhang Zheng,

Hong Zhong,

Geyong Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Mengnan and Zhang, Lihe and Wang, Bo and Zheng, Tianhang and Zhong, Hong and Min, Geyong}, title = {Mitigating Error Amplification in Fast Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13346-13355} }
Complet4R: Geometric Complete 4D Reconstruction: Weibang Wang,

Kenan Li,

Zhuoguang Chen,

Yijun Yuan,

Hang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Weibang and Li, Kenan and Chen, Zhuoguang and Yuan, Yijun and Zhao, Hang}, title = {Complet4R: Geometric Complete 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {341-351} }
Cross-Scale Pansharpening via ScaleFormer and the PanScale Benchmark: Ke Cao,

Xuanhua He,

Xueheng Li,

Lingting Zhu,

Yingying Wang,

Ao Ma,

Zhanjie Zhang,

Man Zhou,

Chengjun Xie,

Jie Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Ke and He, Xuanhua and Li, Xueheng and Zhu, Lingting and Wang, Yingying and Ma, Ao and Zhang, Zhanjie and Zhou, Man and Xie, Chengjun and Zhang, Jie}, title = {Cross-Scale Pansharpening via ScaleFormer and the PanScale Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13211-13221} }
MatLat: Material Latent Space for PBR Texture Generation: Kyeongmin Yeo,

Yunhong Min,

Jaihoon Kim,

Minhyuk Sung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeo_2026_CVPR, author = {Yeo, Kyeongmin and Min, Yunhong and Kim, Jaihoon and Sung, Minhyuk}, title = {MatLat: Material Latent Space for PBR Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4602-4612} }
LoG3D: Ultra-High-Resolution 3D Shape Modeling via Local-to-Global Partitioning: Xinran Yang,

Shuichang Lai,

Jiangjing Lyu,

Hongjie Li,

Bowen Pan,

Yuanqi Li,

Jie Guo,

Zhengkang Zhou,

Yanwen Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xinran and Lai, Shuichang and Lyu, Jiangjing and Li, Hongjie and Pan, Bowen and Li, Yuanqi and Guo, Jie and Zhou, Zhengkang and Guo, Yanwen}, title = {LoG3D: Ultra-High-Resolution 3D Shape Modeling via Local-to-Global Partitioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5945-5955} }
StaR-KVQA: Structured Reasoning Traces for Implicit-Knowledge Visual Question Answering: Zhihao Wen,

Wenkang Wei,

Yuan Fang,

Xingtong Yu,

Hui Zhang,

Weicheng Zhu,

Xin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Zhihao and Wei, Wenkang and Fang, Yuan and Yu, Xingtong and Zhang, Hui and Zhu, Weicheng and Zhang, Xin}, title = {StaR-KVQA: Structured Reasoning Traces for Implicit-Knowledge Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5114-5124} }
A Causal Marriage between VLM and IRM from Understanding to Reasoning: Ziliang Chen,

Tianang Xiao,

Jusheng Zhang,

Yongsen Zheng,

Yang Liu,

Zhao-rong Lai,

Liang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ziliang and Xiao, Tianang and Zhang, Jusheng and Zheng, Yongsen and Liu, Yang and Lai, Zhao-rong and Lin, Liang}, title = {A Causal Marriage between VLM and IRM from Understanding to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4749-4760} }
Revisiting the Necessity of Lengthy Chain-of-Thought in Vision-centric Reasoning Generalization: Yifan Du,

Kun Zhou,

Yingqian Min,

Yue Ling,

Wayne Xin Zhao,

Youbin Wu,

Ji-Rong Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Yifan and Zhou, Kun and Min, Yingqian and Ling, Yue and Zhao, Wayne Xin and Wu, Youbin and Wen, Ji-Rong}, title = {Revisiting the Necessity of Lengthy Chain-of-Thought in Vision-centric Reasoning Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12019-12029} }
Leveraging Multispectral Sensors for Color Correction in Mobile Cameras: Luca Cogo,

Marco Buzzelli,

Simone Bianco,

Javier Vazquez-Corral,

Raimondo Schettini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cogo_2026_CVPR, author = {Cogo, Luca and Buzzelli, Marco and Bianco, Simone and Vazquez-Corral, Javier and Schettini, Raimondo}, title = {Leveraging Multispectral Sensors for Color Correction in Mobile Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12438-12447} }
Real-Time Generation of Streamable Talking Portrait Video with Reference-Guided Deep Compression VAEs: Sicheng Xu,

Yu Deng,

Shoukang Hu,

Yichuan Wang,

Yizhong Zhang,

Zhan Chen,

Jiaolong Yang,

Baining Guo; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Sicheng and Deng, Yu and Hu, Shoukang and Wang, Yichuan and Zhang, Yizhong and Chen, Zhan and Yang, Jiaolong and Guo, Baining}, title = {Real-Time Generation of Streamable Talking Portrait Video with Reference-Guided Deep Compression VAEs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9284-9295} }
Anchoring and Rescaling Attention for Semantically Coherent Inbetweening: Tae Eun Choi,

Sumin Shim,

Junhyeok Kim,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Tae Eun and Shim, Sumin and Kim, Junhyeok and Hwang, Seong Jae}, title = {Anchoring and Rescaling Attention for Semantically Coherent Inbetweening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8976-8985} }
Beyond Graph Model: Reliable VLM Fine-Tuning via Random Graph Adapter: Bo Jiang,

Xueyang Ze,

Beibei Wang,

Xixi Wang,

Xixi Wan,

Bin Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Bo and Ze, Xueyang and Wang, Beibei and Wang, Xixi and Wan, Xixi and Luo, Bin}, title = {Beyond Graph Model: Reliable VLM Fine-Tuning via Random Graph Adapter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11664-11673} }
HulluEdit: Single-Pass Evidence-Consistent Subspace Editing for Mitigating Hallucinations in Large Vision-Language Models: Yangguang Lin,

Quan Fang,

Yufei Li,

Jiachen Sun,

Junyu Gao,

Jitao Sang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yangguang and Fang, Quan and Li, Yufei and Sun, Jiachen and Gao, Junyu and Sang, Jitao}, title = {HulluEdit: Single-Pass Evidence-Consistent Subspace Editing for Mitigating Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11086-11095} }
LiveGesture: Streamable Co-Speech Gesture Generation Model: Muhammad Usama Saleem,

Mayur Jagdishbhai Patel,

Ekkasit Pinyoanuntapong,

Zhongxing Qin,

Li Yang,

Hongfei Xue,

Ahmed Helmy,

Chen Chen,

Pu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saleem_2026_CVPR, author = {Saleem, Muhammad Usama and Patel, Mayur Jagdishbhai and Pinyoanuntapong, Ekkasit and Qin, Zhongxing and Yang, Li and Xue, Hongfei and Helmy, Ahmed and Chen, Chen and Wang, Pu}, title = {LiveGesture: Streamable Co-Speech Gesture Generation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2264-2273} }
PercHead: Perceptual Head Model for Single-Image 3D Head Reconstruction & Editing: Antonio Oroz,

Matthias Nießner,

Tobias Kirschstein; [pdf] [supp]
[bibtex]
@InProceedings{Oroz_2026_CVPR, author = {Oroz, Antonio and Nie{\ss}ner, Matthias and Kirschstein, Tobias}, title = {PercHead: Perceptual Head Model for Single-Image 3D Head Reconstruction \& Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4097-4108} }
Affine Perspective-Three-Point Problem: Gaku Nakano; [pdf] [supp]
[bibtex]
@InProceedings{Nakano_2026_CVPR, author = {Nakano, Gaku}, title = {Affine Perspective-Three-Point Problem}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12217-12226} }
Refacade: Editing Object with Given Reference Texture: Youze Huang,

Penghui Ruan,

Bojia Zi,

Xianbiao Qi,

Jianan Wang,

Rong Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Youze and Ruan, Penghui and Zi, Bojia and Qi, Xianbiao and Wang, Jianan and Xiao, Rong}, title = {Refacade: Editing Object with Given Reference Texture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1961-1972} }
Fine-grained Image Aesthetic Assessment: Learning Discriminative Scores from Relative Ranks: Zhichao Yang,

Jianjie Wang,

Zhixianhe Zhang,

Pangu Xie,

Xiangfei Sheng,

Pengfei Chen,

Leida Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhichao and Wang, Jianjie and Zhang, Zhixianhe and Xie, Pangu and Sheng, Xiangfei and Chen, Pengfei and Li, Leida}, title = {Fine-grained Image Aesthetic Assessment: Learning Discriminative Scores from Relative Ranks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {145-155} }
LiteSense: Lifting Lightweight ToF with RGB for High-Resolution Metric Depth Estimation: Yusheng Li,

Lizhi Lou,

Yan Tang,

Zekai Miao,

Shaoming Zhang,

Jianmei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yusheng and Lou, Lizhi and Tang, Yan and Miao, Zekai and Zhang, Shaoming and Wang, Jianmei}, title = {LiteSense: Lifting Lightweight ToF with RGB for High-Resolution Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5783-5792} }
COG: Confidence-aware Optimal Geometric Correspondence for Unsupervised Single-reference Novel Object Pose Estimation: Yuchen Che,

Jingtu Wu,

Hao Zheng,

Asako Kanezaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Yuchen and Wu, Jingtu and Zheng, Hao and Kanezaki, Asako}, title = {COG: Confidence-aware Optimal Geometric Correspondence for Unsupervised Single-reference Novel Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11567-11578} }
Long-Tail Internet Photo Reconstruction: Yuan Li,

Yuanbo Xiangli,

Hadar Averbuch-Elor,

Noah Snavely,

Ruojin Cai; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuan and Xiangli, Yuanbo and Averbuch-Elor, Hadar and Snavely, Noah and Cai, Ruojin}, title = {Long-Tail Internet Photo Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {417-426} }
Write Where It Matters: Policy-Guided Watermarks for 3D Gaussian Splatting: Nan Li,

Yike Zeng,

Qian Zhang,

Qi Zhang,

Zhiyi Pan,

Wei Feng,

Liang Wan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Nan and Zeng, Yike and Zhang, Qian and Zhang, Qi and Pan, Zhiyi and Feng, Wei and Wan, Liang}, title = {Write Where It Matters: Policy-Guided Watermarks for 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6580-6590} }
WikiCLIP: An Efficient Contrastive Baseline for Open-domain Visual Entity Recognition: Shan Ning,

Longtian Qiu,

Jiaxuan Sun,

Xuming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Shan and Qiu, Longtian and Sun, Jiaxuan and He, Xuming}, title = {WikiCLIP: An Efficient Contrastive Baseline for Open-domain Visual Entity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1596-1605} }
RaPA: Enhancing Transferable Targeted Attacks via Random Parameter Pruning: Tongrui Su,

Qingbin Li,

Shengyu Zhu,

Wei Chen,

Xueqi Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Tongrui and Li, Qingbin and Zhu, Shengyu and Chen, Wei and Cheng, Xueqi}, title = {RaPA: Enhancing Transferable Targeted Attacks via Random Parameter Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6538-6548} }
X-band Radar Non-Line-of-Sight Imaging: Dongyu Du,

Mingkun Zhao,

Yutong Yang,

Dominik Scheuble,

Xiaolong Huang,

Zijian Shao,

Mario Bijelic,

Kaushik Sengupta,

Felix Heide; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Dongyu and Zhao, Mingkun and Yang, Yutong and Scheuble, Dominik and Huang, Xiaolong and Shao, Zijian and Bijelic, Mario and Sengupta, Kaushik and Heide, Felix}, title = {X-band Radar Non-Line-of-Sight Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5647-5658} }
DiverseGRPO: Mitigating Mode Collapse in Image Generation via Diversity-Aware GRPO: Henglin Liu,

Huijuan Huang,

Jing Wang,

Chang Liu,

Xiu Li,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Henglin and Huang, Huijuan and Wang, Jing and Liu, Chang and Li, Xiu and Ji, Xiangyang}, title = {DiverseGRPO: Mitigating Mode Collapse in Image Generation via Diversity-Aware GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1864-1873} }
Breaking Smooth-Motion Assumptions: A UAV Benchmark for Multi-Object Tracking in Complex and Adverse Conditions: Jingtao Ye,

Kexin Zhang,

Xunchi Ma,

Yuechan Li,

Guangming Zhu,

Peiyi Shen,

Linhua Jiang,

Xiangdong Zhang,

Liang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Jingtao and Zhang, Kexin and Ma, Xunchi and Li, Yuechan and Zhu, Guangming and Shen, Peiyi and Jiang, Linhua and Zhang, Xiangdong and Zhang, Liang}, title = {Breaking Smooth-Motion Assumptions: A UAV Benchmark for Multi-Object Tracking in Complex and Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13594-13603} }
Unleashing the Intrinsic Visual Representation Capability of Multimodal Large Language Models: Hengzhuang Li,

Xinsong Zhang,

Qiming Peng,

Bin Luo,

Han Hu,

Dengyang Jiang,

Han-Jia Ye,

Teng Zhang,

Hai Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hengzhuang and Zhang, Xinsong and Peng, Qiming and Luo, Bin and Hu, Han and Jiang, Dengyang and Ye, Han-Jia and Zhang, Teng and Jin, Hai}, title = {Unleashing the Intrinsic Visual Representation Capability of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1771-1786} }
MotionEdit: Benchmarking and Learning Motion-Centric Image Editing: Yixin Wan,

Lei Ke,

Wenhao Yu,

Kai-Wei Chang,

Dong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Yixin and Ke, Lei and Yu, Wenhao and Chang, Kai-Wei and Yu, Dong}, title = {MotionEdit: Benchmarking and Learning Motion-Centric Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9263-9272} }
OMG-Avatar: One-shot Multi-LOD Gaussian Head Avatar: Jianqiang Ren,

Lin Liu,

Steven Hoi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Jianqiang and Liu, Lin and Hoi, Steven}, title = {OMG-Avatar: One-shot Multi-LOD Gaussian Head Avatar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11017-11028} }
Reflection Separation from a Single Image via Joint Latent Diffusion: Zheng-Hui Huang,

Zhixiang Wang,

Yu-Lun Liu,

Yung-Yu Chuang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zheng-Hui and Wang, Zhixiang and Liu, Yu-Lun and Chuang, Yung-Yu}, title = {Reflection Separation from a Single Image via Joint Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4569-4579} }
Envisioning the Future, One Step at a Time: Stefan Andreas Baumann,

Jannik Wiese,

Tommaso Martorella,

Mahdi M. Kalayeh,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baumann_2026_CVPR, author = {Baumann, Stefan Andreas and Wiese, Jannik and Martorella, Tommaso and Kalayeh, Mahdi M. and Ommer, Bj\"orn}, title = {Envisioning the Future, One Step at a Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6823-6836} }
MeToM: Metadata-Guided Token Merging for Efficient Video LLMs: Zhuojie Wu,

Shijie Wang,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhuojie and Wang, Shijie and Yu, Xin}, title = {MeToM: Metadata-Guided Token Merging for Efficient Video LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10441-10450} }
Breaking Spurious Correlations: Uncertainty-Driven Causal Transformers for AU Detection: Yuru Wang,

Yue Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuru and Zhou, Yue}, title = {Breaking Spurious Correlations: Uncertainty-Driven Causal Transformers for AU Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7165-7174} }
Self-Evaluation Unlocks Any-Step Text-to-Image Generation: Xin Yu,

Xiaojuan Qi,

Zhengqi Li,

Kai Zhang,

Richard Zhang,

Zhe Lin,

Eli Shechtman,

Tianyu Wang,

Yotam Nitzan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xin and Qi, Xiaojuan and Li, Zhengqi and Zhang, Kai and Zhang, Richard and Lin, Zhe and Shechtman, Eli and Wang, Tianyu and Nitzan, Yotam}, title = {Self-Evaluation Unlocks Any-Step Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7816-7826} }
LuxRemix: Lighting Decomposition and Remixing for Indoor Scenes: Ruofan Liang,

Norman Müller,

Ethan Weber,

Duncan Zauss,

Nandita Vijaykumar,

Peter Kontschieder,

Christian Richardt; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Ruofan and M\"uller, Norman and Weber, Ethan and Zauss, Duncan and Vijaykumar, Nandita and Kontschieder, Peter and Richardt, Christian}, title = {LuxRemix: Lighting Decomposition and Remixing for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1100-1111} }
AVATAR: Reinforcement Learning to See, Hear, and Reason Over Video: Yogesh Kulkarni,

Pooyan Fazli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Yogesh and Fazli, Pooyan}, title = {AVATAR: Reinforcement Learning to See, Hear, and Reason Over Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7912-7922} }
VGGDrive: Empowering Vision-Language Models with Cross-View Geometric Grounding for Autonomous Driving: Jie Wang,

Guang Li,

Zhijian Huang,

Chenxu Dang,

Hangjun Ye,

Yahong Han,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jie and Li, Guang and Huang, Zhijian and Dang, Chenxu and Ye, Hangjun and Han, Yahong and Chen, Long}, title = {VGGDrive: Empowering Vision-Language Models with Cross-View Geometric Grounding for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10954-10964} }
FBTA: Enabling Single-GPU End-to-End Gigapixel WSI Classification with Feature Bridging and Translation Alignment: Jiuyang Dong,

Jiahan Li,

Junjun Jiang,

Yongbing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Jiuyang and Li, Jiahan and Jiang, Junjun and Zhang, Yongbing}, title = {FBTA: Enabling Single-GPU End-to-End Gigapixel WSI Classification with Feature Bridging and Translation Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7026-7035} }
Agile Deliberation: Concept Deliberation for Subjective Visual Classification: Leijie Wang,

Otilia Stretcu,

Wei Qiao,

Thomas Denby,

Krishnamurthy Viswanathan,

Enming Luo,

Chun-Ta Lu,

Tushar Dogra,

Ranjay Krishna,

Ariel Fuxman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Leijie and Stretcu, Otilia and Qiao, Wei and Denby, Thomas and Viswanathan, Krishnamurthy and Luo, Enming and Lu, Chun-Ta and Dogra, Tushar and Krishna, Ranjay and Fuxman, Ariel}, title = {Agile Deliberation: Concept Deliberation for Subjective Visual Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4794-4804} }
EMMA: Extracting Multiple physical parameters from Multimodal Data: Farhat Shaikh,

Ayan Banerjee,

Sandeep Gupta; [pdf] [supp]
[bibtex]
@InProceedings{Shaikh_2026_CVPR, author = {Shaikh, Farhat and Banerjee, Ayan and Gupta, Sandeep}, title = {EMMA: Extracting Multiple physical parameters from Multimodal Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1716-1725} }
MS-Temba: Multi-Scale Temporal Mamba for Understanding Long Untrimmed Videos: Arkaprava Sinha,

Monish Soundar Raj,

Pu Wang,

Ahmed Helmy,

Hieu Le,

Srijan Das; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sinha_2026_CVPR, author = {Sinha, Arkaprava and Raj, Monish Soundar and Wang, Pu and Helmy, Ahmed and Le, Hieu and Das, Srijan}, title = {MS-Temba: Multi-Scale Temporal Mamba for Understanding Long Untrimmed Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9815-9826} }
HAVE-Bench: Hierarchical Audio-Visual Evaluation from Perception to Interaction: Muyan Zhong,

Erfei Cui,

Sen Xing,

Weiyun Wang,

Wen Wu,

Yuchen Hu,

Yanting Zhang,

Xiaowei Hu,

Wenhai Wang,

Chao Zhang,

Jifeng Dai; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Muyan and Cui, Erfei and Xing, Sen and Wang, Weiyun and Wu, Wen and Hu, Yuchen and Zhang, Yanting and Hu, Xiaowei and Wang, Wenhai and Zhang, Chao and Dai, Jifeng}, title = {HAVE-Bench: Hierarchical Audio-Visual Evaluation from Perception to Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8801-8812} }
UAST: Unified Active Search and Tracking for Arbitrary Targets with UAVs: Liang Qin,

Min Wang,

Xingyu Lu,

Aowen Qiu,

Wengang Zhou,

Houqiang Li; [pdf]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Liang and Wang, Min and Lu, Xingyu and Qiu, Aowen and Zhou, Wengang and Li, Houqiang}, title = {UAST: Unified Active Search and Tracking for Arbitrary Targets with UAVs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13464-13473} }
TerraSeg: Self-Supervised Ground Segmentation for Any LiDAR: Ted Lentsch,

Santiago Montiel-Marín,

Holger Caesar,

Dariu M. Gavrila; [pdf] [supp]
[bibtex]
@InProceedings{Lentsch_2026_CVPR, author = {Lentsch, Ted and Montiel-Mar{\'\i}n, Santiago and Caesar, Holger and Gavrila, Dariu M.}, title = {TerraSeg: Self-Supervised Ground Segmentation for Any LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10040-10050} }
META: Meta Evolution of Tool Trajectory Adaptation for Long-Video Understanding: Jing Huang,

Luyuan Chen,

Zhijie Xu,

Yadong Li,

Xingzhong Xu,

Siye Chen,

Jie Liu,

Ming Kong,

Qiang Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jing and Chen, Luyuan and Xu, Zhijie and Li, Yadong and Xu, Xingzhong and Chen, Siye and Liu, Jie and Kong, Ming and Zhu, Qiang}, title = {META: Meta Evolution of Tool Trajectory Adaptation for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9837-9846} }
Wanderland: Geometrically Grounded Simulation for Open-World Embodied AI: Xinhao Liu,

Jiaqi Li,

Youming Deng,

Ruxin Chen,

Yingjia Zhang,

Yifei Ma,

Li Guo,

Yiming Li,

Jing Zhang,

Chen Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinhao and Li, Jiaqi and Deng, Youming and Chen, Ruxin and Zhang, Yingjia and Ma, Yifei and Guo, Li and Li, Yiming and Zhang, Jing and Feng, Chen}, title = {Wanderland: Geometrically Grounded Simulation for Open-World Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1041-1052} }
Rethinking Box Supervision: Bias-Free Weakly Supervised Medical Segmentation: Jun Wei,

Hui Huang; [pdf]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jun and Huang, Hui}, title = {Rethinking Box Supervision: Bias-Free Weakly Supervised Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8525-8534} }
Visual Document Understanding and Reasoning: A Multi-Agent Collaboration Framework with Agent-Wise Adaptive Test-Time Scaling: Xinlei Yu,

Chengming Xu,

Zhangquan Chen,

Yudong Zhang,

Shilin Lu,

Cheng Yang,

Jiangning Zhang,

Shuicheng Yan,

Xiaobin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xinlei and Xu, Chengming and Chen, Zhangquan and Zhang, Yudong and Lu, Shilin and Yang, Cheng and Zhang, Jiangning and Yan, Shuicheng and Hu, Xiaobin}, title = {Visual Document Understanding and Reasoning: A Multi-Agent Collaboration Framework with Agent-Wise Adaptive Test-Time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12300-12311} }
Bridging Human Evaluation to Infrared and Visible Image Fusion: Jinyuan Liu,

Xingyuan Li,

Qingyun Mei,

Haoyuan Xu,

Zhiying Jiang,

Long Ma,

Risheng Liu,

Xin Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinyuan and Li, Xingyuan and Mei, Qingyun and Xu, Haoyuan and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {Bridging Human Evaluation to Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12322-12333} }
Speeding Up the Learning of 3D Gaussians with Much Shorter Gaussian Lists: Jiaqi Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Han, Zhizhong}, title = {Speeding Up the Learning of 3D Gaussians with Much Shorter Gaussian Lists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1231-1240} }
Guiding a Diffusion Transformer with the Internal Dynamics of Itself: Xingyu Zhou,

Qifan Li,

Xiaobin Hu,

Hai Chen,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xingyu and Li, Qifan and Hu, Xiaobin and Chen, Hai and Gu, Shuhang}, title = {Guiding a Diffusion Transformer with the Internal Dynamics of Itself}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11536-11545} }
DiG: Differential Grounding for Enhancing Fine-Grained Perception in Multimodal Large Language Models: Zhou Tao,

Shida Wang,

YongXiang Hua,

Haoyu Cao,

Linli Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Zhou and Wang, Shida and Hua, YongXiang and Cao, Haoyu and Xu, Linli}, title = {DiG: Differential Grounding for Enhancing Fine-Grained Perception in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1695-1705} }
From Failure to Feedback: Group Revision Unlocks Hard Cases in Object-Level Grounding: Yuyuan Liu,

Yiping Ji,

Anjie Le,

Jiayuan Zhu,

Jiazhen Pan,

Can Peng,

Jiajun Deng,

Fengbei Liu,

Junde Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Ji, Yiping and Le, Anjie and Zhu, Jiayuan and Pan, Jiazhen and Peng, Can and Deng, Jiajun and Liu, Fengbei and Wu, Junde}, title = {From Failure to Feedback: Group Revision Unlocks Hard Cases in Object-Level Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4816-4828} }
Understanding Counting Mechanisms in Large Language and Vision-Language Models: Hosein Hasani,

Amirmohammad Izadi,

Fatemeh Askari,

Mobin Bagherian,

Sadegh Mohammadian,

Mohammad Izadi,

Mahdieh Soleymani Baghshah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hasani_2026_CVPR, author = {Hasani, Hosein and Izadi, Amirmohammad and Askari, Fatemeh and Bagherian, Mobin and Mohammadian, Sadegh and Izadi, Mohammad and Baghshah, Mahdieh Soleymani}, title = {Understanding Counting Mechanisms in Large Language and Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5125-5133} }
MMDIR: Multimodal Instruction-Driven Framework for Mixed-Degradation Document Image Restoration: Heng Li,

Xingyuan Wang,

Yang Fan,

Yunan Zhang,

Xiangping Wu,

Qingcai Chen; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Heng and Wang, Xingyuan and Fan, Yang and Zhang, Yunan and Wu, Xiangping and Chen, Qingcai}, title = {MMDIR: Multimodal Instruction-Driven Framework for Mixed-Degradation Document Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8387-8396} }
ID-Sim: An Identity-Focused Similarity Metric: Julia Chae,

Nicholas Kolkin,

Jui-Hsien Wang,

Richard Zhang,

Sara Beery,

Cusuh Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chae_2026_CVPR, author = {Chae, Julia and Kolkin, Nicholas and Wang, Jui-Hsien and Zhang, Richard and Beery, Sara and Ham, Cusuh}, title = {ID-Sim: An Identity-Focused Similarity Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11250-11262} }
Free-Lunch Long Video Generation via Layer-Adaptive O.O.D Correction: Jiahao Tian,

Chenxi Song,

Wei Cheng,

Chi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Jiahao and Song, Chenxi and Cheng, Wei and Zhang, Chi}, title = {Free-Lunch Long Video Generation via Layer-Adaptive O.O.D Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1973-1982} }
Black-box Membership Inference Attacks on the Pre-training Data of Image-generation Models: Tao Qi,

Huili Wang,

Yuanhong Huang,

Wendan Wang,

Lianchao Zhao,

Jinrui Wang,

Zichen Qin,

Shangguang Wang,

Yongfeng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Tao and Wang, Huili and Huang, Yuanhong and Wang, Wendan and Zhao, Lianchao and Wang, Jinrui and Qin, Zichen and Wang, Shangguang and Huang, Yongfeng}, title = {Black-box Membership Inference Attacks on the Pre-training Data of Image-generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {62-71} }
Inconsistency-aware Multimodal Schrodinger Bridge for Deepfake Localization: Jiayu Xiong,

Jing Wang,

Qi Zhang,

Wanlong Wang,

Jun Xue; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Jiayu and Wang, Jing and Zhang, Qi and Wang, Wanlong and Xue, Jun}, title = {Inconsistency-aware Multimodal Schrodinger Bridge for Deepfake Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8697-8706} }
ViTPrompt: Training-Free Prompt Refinement with Visual Tokens for Open-Vocabulary Detection: Yitong Qin,

Lihua Zhou,

Jiwei Wei,

Ran Ran,

Shiyuan He,

Zeyu Ma,

Shuaifeng Li,

Nianxin Li,

Heng Tao Shen; [pdf]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yitong and Zhou, Lihua and Wei, Jiwei and Ran, Ran and He, Shiyuan and Ma, Zeyu and Li, Shuaifeng and Li, Nianxin and Shen, Heng Tao}, title = {ViTPrompt: Training-Free Prompt Refinement with Visual Tokens for Open-Vocabulary Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3111-3121} }
HumanBA: Human-Aware Bundle Adjustment via Global Human-Camera Decoupling: Fengyuan Yang,

Tanuj Sur,

Tze Ho Elden Tse,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Fengyuan and Sur, Tanuj and Tse, Tze Ho Elden and Yao, Angela}, title = {HumanBA: Human-Aware Bundle Adjustment via Global Human-Camera Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13846-13855} }
Beyond Heuristic Prompting: A Concept-Guided Bayesian Framework for Zero-Shot Image Recognition: Hui Liu,

Kecheng Chen,

Jialiang Wang,

Xianming Liu,

Wenya Wang,

Haoliang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hui and Chen, Kecheng and Wang, Jialiang and Liu, Xianming and Wang, Wenya and Li, Haoliang}, title = {Beyond Heuristic Prompting: A Concept-Guided Bayesian Framework for Zero-Shot Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5521-5531} }
Beyond Endpoints: Path-Centric Reasoning for Vectorized Off-Road Network Extraction: Wenfei Guan,

Jilin Mei,

Tong Shen,

Xumin Wu,

Shuo Wang,

Chen Min,

Yu Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Wenfei and Mei, Jilin and Shen, Tong and Wu, Xumin and Wang, Shuo and Min, Chen and Hu, Yu}, title = {Beyond Endpoints: Path-Centric Reasoning for Vectorized Off-Road Network Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13254-13263} }
Scaling Self-Supervised and Cross-Modal Pretraining for Volumetric CT Transformers: Cris Claessens,

Christiaan Viviers,

Giacomo D'Amicantonio,

Egor Bondarev,

Fons van der Sommen; [pdf] [supp]
[bibtex]
@InProceedings{Claessens_2026_CVPR, author = {Claessens, Cris and Viviers, Christiaan and D'Amicantonio, Giacomo and Bondarev, Egor and van der Sommen, Fons}, title = {Scaling Self-Supervised and Cross-Modal Pretraining for Volumetric CT Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13636-13647} }
SDDF: Specificity-Driven Dynamic Focusing for Open-Vocabulary Camouflaged Object Detection: Jiaming Liang,

Yifeng Zhan,

Chunlin Liu,

Weihua Zheng,

Bingye Peng,

Qiwei Liang,

Boyang Cai,

Xiaochun Mai,

Qiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Jiaming and Zhan, Yifeng and Liu, Chunlin and Zheng, Weihua and Peng, Bingye and Liang, Qiwei and Cai, Boyang and Mai, Xiaochun and Nie, Qiang}, title = {SDDF: Specificity-Driven Dynamic Focusing for Open-Vocabulary Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13049-13058} }
Lens Component Deletion based on Differentiable Ray Tracing: Wenguan Zhang,

Qirun Zhang,

Tuo Sun,

Jiajian He,

Jiahui Xu,

Huajun Feng,

Qi Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenguan and Zhang, Qirun and Sun, Tuo and He, Jiajian and Xu, Jiahui and Feng, Huajun and Li, Qi}, title = {Lens Component Deletion based on Differentiable Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5637-5646} }
MR. Illuminate: Zero-Shot Low-Light Image Enhancement with Diffusion Prior: Joshua Cho,

Sara Aghajanzadeh,

Zhen Zhu,

David Forsyth; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Joshua and Aghajanzadeh, Sara and Zhu, Zhen and Forsyth, David}, title = {MR. Illuminate: Zero-Shot Low-Light Image Enhancement with Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8460-8470} }
GlyphPrinter: Region-Grouped Direct Preference Optimization for Glyph-Accurate Visual Text Rendering: Xincheng Shuai,

Ziye Li,

Henghui Ding,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shuai_2026_CVPR, author = {Shuai, Xincheng and Li, Ziye and Ding, Henghui and Tao, Dacheng}, title = {GlyphPrinter: Region-Grouped Direct Preference Optimization for Glyph-Accurate Visual Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7674-7683} }
F2Net: A Frequency-Fused Network for Ultra-High Resolution Remote Sensing Segmentation: Hengzhi Chen,

Liqian Feng,

Wenhua Wu,

Xiaogang Zhu,

Qiuxia Wu,

Lianlei Shan,

Kun Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hengzhi and Feng, Liqian and Wu, Wenhua and Zhu, Xiaogang and Wu, Qiuxia and Shan, Lianlei and Hu, Kun}, title = {F2Net: A Frequency-Fused Network for Ultra-High Resolution Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13275-13284} }
Adversarial Style Optimization: Enhancing VLM Jailbreaks by GRPO-based Stylistic Triggers Optimization: Bingjun Luo,

Jialin Guo,

Yue Yao,

Xinpeng Ding; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Bingjun and Guo, Jialin and Yao, Yue and Ding, Xinpeng}, title = {Adversarial Style Optimization: Enhancing VLM Jailbreaks by GRPO-based Stylistic Triggers Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11-19} }
Evo-1: Lightweight Vision-Language-Action Model with Preserved Semantic Alignment: Tao Lin,

Yilei Zhong,

Yuxin Du,

Jingjing Zhang,

Jiting Liu,

Yinxinyu Chen,

Encheng Gu,

Ziyan Liu,

Hongyi Cai,

Yanwen Zou,

Lixing Zou,

Zhaoye Zhou,

Gen Li,

Bo Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Tao and Zhong, Yilei and Du, Yuxin and Zhang, Jingjing and Liu, Jiting and Chen, Yinxinyu and Gu, Encheng and Liu, Ziyan and Cai, Hongyi and Zou, Yanwen and Zou, Lixing and Zhou, Zhaoye and Li, Gen and Zhao, Bo}, title = {Evo-1: Lightweight Vision-Language-Action Model with Preserved Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13397-13406} }
Variational Graph-based Normal Integration: Lixiong Chen,

Bohan Yu,

Victor Adrian Prisacariu,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Lixiong and Yu, Bohan and Prisacariu, Victor Adrian and Sato, Imari}, title = {Variational Graph-based Normal Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12663-12672} }
OpenVO: Open-World Visual Odometry with Temporal Dynamics Awareness: Phuc Nguyen,

Anh N. Nhu,

Ming C. Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Phuc and Nhu, Anh N. and Lin, Ming C.}, title = {OpenVO: Open-World Visual Odometry with Temporal Dynamics Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14208-14218} }
S2D: Sparse to Dense Lifting for 3D Reconstruction with Minimal Inputs: Yuzhou Ji,

Qijian Tian,

He Zhu,

Xiaoqi Jiang,

Guangzhi Cao,

Lizhuang Ma,

Yuan Xie,

Xin Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Yuzhou and Tian, Qijian and Zhu, He and Jiang, Xiaoqi and Cao, Guangzhi and Ma, Lizhuang and Xie, Yuan and Tan, Xin}, title = {S2D: Sparse to Dense Lifting for 3D Reconstruction with Minimal Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7491-7502} }
NexusFlow: Unifying Disparate Tasks under Partial Supervision via Invertible Flow Networks: Fangzhou Lin,

Yuping Wang,

Yuliang Guo,

Zixun Huang,

Xinyu Huang,

Haichong Zhang,

Kazunori Yamada,

Zhengzhong Tu,

Liu Ren,

Ziming Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Fangzhou and Wang, Yuping and Guo, Yuliang and Huang, Zixun and Huang, Xinyu and Zhang, Haichong and Yamada, Kazunori and Tu, Zhengzhong and Ren, Liu and Zhang, Ziming}, title = {NexusFlow: Unifying Disparate Tasks under Partial Supervision via Invertible Flow Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3761-3771} }
Exploring Spatiotemporal Feature Propagation for Video-Level Compressive Spectral Reconstruction: Dataset, Model and Benchmark: Lijing Cai,

Zhan Shi,

Chenglong Huang,

Jinyao Wu,

Qiping Li,

Zikang Huo,

Linsen Chen,

Chongde Zi,

Xun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Lijing and Shi, Zhan and Huang, Chenglong and Wu, Jinyao and Li, Qiping and Huo, Zikang and Chen, Linsen and Zi, Chongde and Cao, Xun}, title = {Exploring Spatiotemporal Feature Propagation for Video-Level Compressive Spectral Reconstruction: Dataset, Model and Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12522-12532} }
InterRVOS: Interaction-Aware Referring Video Object Segmentation: Woojeong Jin,

Seongchan Kim,

Jaeho Lee,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Woojeong and Kim, Seongchan and Lee, Jaeho and Kim, Seungryong}, title = {InterRVOS: Interaction-Aware Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10367-10376} }
TopoSlide: Topologically-Informed Histopathology Whole Slide Image Representation Learning: Shahira Abousamra,

Asmita Sood,

Sylvia Plevritis; [pdf] [supp]
[bibtex]
@InProceedings{Abousamra_2026_CVPR, author = {Abousamra, Shahira and Sood, Asmita and Plevritis, Sylvia}, title = {TopoSlide: Topologically-Informed Histopathology Whole Slide Image Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13691-13701} }
Same Content, Different Answers: Cross-Modal Inconsistency in MLLMs: Angela van Sprang,

Laurens Samson,

Ana Lucic,

Erman Acar,

Sennay Ghebreab,

Yuki M. Asano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{van_Sprang_2026_CVPR, author = {van Sprang, Angela and Samson, Laurens and Lucic, Ana and Acar, Erman and Ghebreab, Sennay and Asano, Yuki M.}, title = {Same Content, Different Answers: Cross-Modal Inconsistency in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8781-8790} }
LNEM: Lunar Neural Elevation Model: Suwan Lee,

Jo Ryeong Yim,

Kibaek Park,

Dong-Gyu Kim,

Eunhyeuk Kim,

Minsup Jeong,

Chae Kyung Sim,

Seokju Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Suwan and Yim, Jo Ryeong and Park, Kibaek and Kim, Dong-Gyu and Kim, Eunhyeuk and Jeong, Minsup and Sim, Chae Kyung and Lee, Seokju}, title = {LNEM: Lunar Neural Elevation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6508-6517} }
Omni IIE Bench: Benchmarking the Practical Capabilities of Image Editing Models: Yujia Yang,

Yuanxiang Wang,

Zhenyu Guan,

Tiankun Yang,

Chenxi Bao,

Haopeng Jin,

Jinwen Luo,

Xinyu Zuo,

Lisheng Duan,

Haijin Liang,

Jin Ma,

Xinming Wang,

Ruiwen Tao,

Hongzhu Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yujia and Wang, Yuanxiang and Guan, Zhenyu and Yang, Tiankun and Bao, Chenxi and Jin, Haopeng and Luo, Jinwen and Zuo, Xinyu and Duan, Lisheng and Liang, Haijin and Ma, Jin and Wang, Xinming and Tao, Ruiwen and Yi, Hongzhu}, title = {Omni IIE Bench: Benchmarking the Practical Capabilities of Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1089-1099} }
VibeToken: Scaling 1D Image Tokenizers and Autoregressive Models for Dynamic Resolution Generations: Maitreya Patel,

Jingtao Li,

Weiming Zhuang,

Yezhou Yang,

Lingjuan Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patel_2026_CVPR, author = {Patel, Maitreya and Li, Jingtao and Zhuang, Weiming and Yang, Yezhou and Lv, Lingjuan}, title = {VibeToken: Scaling 1D Image Tokenizers and Autoregressive Models for Dynamic Resolution Generations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2058-2068} }
CAD-Refiner: A Unified Framework for CAD Generation and Iterative Editing: Meng Yuan,

Dawei Lin,

Hongxia Xie,

Tieru Wu,

Rui Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Meng and Lin, Dawei and Xie, Hongxia and Wu, Tieru and Ma, Rui}, title = {CAD-Refiner: A Unified Framework for CAD Generation and Iterative Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3244-3253} }
When LoRA Betrays: Backdooring Text-to-Image Models by Masquerading as Benign Adapters: Liangwei Lyu,

Jiaqi Xu,

Jianwei Ding,

Qiyao Deng; [pdf] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Liangwei and Xu, Jiaqi and Ding, Jianwei and Deng, Qiyao}, title = {When LoRA Betrays: Backdooring Text-to-Image Models by Masquerading as Benign Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8577-8586} }
Bootstrapping Video Semantic Segmentation Model via Distillation-assisted Test-Time Adaptation: Jihun Kim,

Hoyong Kwon,

Hyeokjun Kweon,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jihun and Kwon, Hoyong and Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {Bootstrapping Video Semantic Segmentation Model via Distillation-assisted Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10766-10777} }
ICTPolarReal: A Polarized Reflection and Material Dataset of Real World Objects: Jing Yang,

Krithika Dharanikota,

Emily Jia,

Haiwei Chen,

Yajie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Dharanikota, Krithika and Jia, Emily and Chen, Haiwei and Zhao, Yajie}, title = {ICTPolarReal: A Polarized Reflection and Material Dataset of Real World Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6518-6527} }
Towards Photorealistic and Efficient Bokeh Rendering via Diffusion Framework: Linxiao Shi,

Siming Zheng,

Zerong Wang,

Hao Zhang,

Jinwei Chen,

Bo Li,

Shifeng Chen,

Peng-Tao Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Linxiao and Zheng, Siming and Wang, Zerong and Zhang, Hao and Chen, Jinwei and Li, Bo and Chen, Shifeng and Jiang, Peng-Tao}, title = {Towards Photorealistic and Efficient Bokeh Rendering via Diffusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {231-240} }
V2U4Real: A Real-world Large-scale Dataset for Vehicle-to-UAV Cooperative Perception: Weijia Li,

Haoen Xiang,

Tianxu Wang,

Shuaibing Wu,

Qiming Xia,

Cheng Wang,

Chenglu Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weijia and Xiang, Haoen and Wang, Tianxu and Wu, Shuaibing and Xia, Qiming and Wang, Cheng and Wen, Chenglu}, title = {V2U4Real: A Real-world Large-scale Dataset for Vehicle-to-UAV Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4728-4737} }
Hilbert-Geo: Solving Solid Geometric Problems by Neural-Symbolic Reasoning: Ruoran Xu,

Haoyu Cheng,

Bin Dong,

Qiufeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Ruoran and Cheng, Haoyu and Dong, Bin and Wang, Qiufeng}, title = {Hilbert-Geo: Solving Solid Geometric Problems by Neural-Symbolic Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9658-9667} }
PosterReward: Unlocking Accurate Evaluation for High-Quality Graphic Design Generation: Jianyu Lai,

Sixiang Chen,

Jialin Gao,

Hengyu Shi,

Zhongying Liu,

Fuxiang Zhai,

Junfeng Luo,

Xiaoming Wei,

Lujia Wang,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Jianyu and Chen, Sixiang and Gao, Jialin and Shi, Hengyu and Liu, Zhongying and Zhai, Fuxiang and Luo, Junfeng and Wei, Xiaoming and Wang, Lujia and Zhu, Lei}, title = {PosterReward: Unlocking Accurate Evaluation for High-Quality Graphic Design Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7762-7772} }
STAvatar: Soft Binding and Temporal Density Control for Monocular 3D Head Avatars Reconstruction: Jiankuo Zhao,

Xiangyu Zhu,

Zidu Wang,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiankuo and Zhu, Xiangyu and Wang, Zidu and Lei, Zhen}, title = {STAvatar: Soft Binding and Temporal Density Control for Monocular 3D Head Avatars Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10996-11005} }
PiLoT: Neural Pixel-to-3D Registration for UAV-based Ego and Target Geo-localization: Xiaoya Cheng,

Long Wang,

Yan Liu,

Xinyi Liu,

Hanlin Tan,

Yu Liu,

Maojun Zhang,

Shen Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Xiaoya and Wang, Long and Liu, Yan and Liu, Xinyi and Tan, Hanlin and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {PiLoT: Neural Pixel-to-3D Registration for UAV-based Ego and Target Geo-localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5379-5388} }
Pano3DComposer: Feed-Forward Compositional 3D Scene Generation from Single Panoramic Image: Zidian Qiu,

Ancong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Zidian and Wu, Ancong}, title = {Pano3DComposer: Feed-Forward Compositional 3D Scene Generation from Single Panoramic Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5902-5911} }
MGDHand: Multi-Granularity Prior-to-Inertial Distillation Framework for Sequential 3D Hand Pose Estimation from Sparse IMUs: Xinyi Wang,

Pengfei Ren,

Haoyang Zhang,

Hanling Zhan,

Yingxi Li,

Liang Xie,

Yue Gao,

Erwei Yin; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyi and Ren, Pengfei and Zhang, Haoyang and Zhan, Hanling and Li, Yingxi and Xie, Liang and Gao, Yue and Yin, Erwei}, title = {MGDHand: Multi-Granularity Prior-to-Inertial Distillation Framework for Sequential 3D Hand Pose Estimation from Sparse IMUs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13996-14005} }
Clinically-Grounded Counterfactual Reasoning for Medical Video Diagnosis: Jianzhe Gao,

Churan Wang,

Weiyi Zhang,

Jianghua Li,

Li-An Li,

Wenguan Wang,

Yixin Zhu,

Yizhou Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jianzhe and Wang, Churan and Zhang, Weiyi and Li, Jianghua and Li, Li-An and Wang, Wenguan and Zhu, Yixin and Wang, Yizhou}, title = {Clinically-Grounded Counterfactual Reasoning for Medical Video Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7014-7025} }
Towards Holistic Modeling for Video Frame Interpolation with Auto-regressive Diffusion Transformers: Xinyu Peng,

Han Li,

Yuyang Huang,

Ziyang Zheng,

Yaoming Wang,

Xin Chen,

Wenrui Dai,

Chenglin Li,

Junni Zou,

Hongkai Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Xinyu and Li, Han and Huang, Yuyang and Zheng, Ziyang and Wang, Yaoming and Chen, Xin and Dai, Wenrui and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {Towards Holistic Modeling for Video Frame Interpolation with Auto-regressive Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11448-11458} }
WildCap: Facial Albedo Capture in the Wild via Hybrid Inverse Rendering: Yuxuan Han,

Xin Ming,

Tianxiao Li,

Zhuofan Shen,

Qixuan Zhang,

Lan Xu,

Feng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Yuxuan and Ming, Xin and Li, Tianxiao and Shen, Zhuofan and Zhang, Qixuan and Xu, Lan and Xu, Feng}, title = {WildCap: Facial Albedo Capture in the Wild via Hybrid Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10909-10920} }
SemanticVLA: Towards Semantic Reasoning over Action Memorization via Synergistic Explicit Trace and Latent Action Planning: Fei Ni,

Zhuo Chen,

Yifu Yuan,

Zibin Dong,

Xianze Yao,

Shan Luo,

Jianye Hao,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Fei and Chen, Zhuo and Yuan, Yifu and Dong, Zibin and Yao, Xianze and Luo, Shan and Hao, Jianye and Deng, Jiankang and Zafeiriou, Stefanos}, title = {SemanticVLA: Towards Semantic Reasoning over Action Memorization via Synergistic Explicit Trace and Latent Action Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12237-12247} }
Towards Stable Self-Supervised Object Representations in Unconstrained Egocentric Video: Yuting Tan,

Xilong Cheng,

Yunxiao Qin,

Zhengnan Li,

Jingjing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Yuting and Cheng, Xilong and Qin, Yunxiao and Li, Zhengnan and Zhang, Jingjing}, title = {Towards Stable Self-Supervised Object Representations in Unconstrained Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10545-10555} }
Ultra-Low Bitrate Perceptual Image Compression with Shallow Encoder: Tianyu Zhang,

Dong Liu,

Chang Wen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianyu and Liu, Dong and Chen, Chang Wen}, title = {Ultra-Low Bitrate Perceptual Image Compression with Shallow Encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12118-12128} }
T2SGrid: Temporal-to-Spatial Gridification for Video Temporal Grounding: Chaohong Guo,

Yihan He,

Yongwei Nie,

Fei Ma,

Xuemiao Xu,

Chengjiang Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Chaohong and He, Yihan and Nie, Yongwei and Ma, Fei and Xu, Xuemiao and Long, Chengjiang}, title = {T2SGrid: Temporal-to-Spatial Gridification for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3443-3454} }
OVI-MAP: Open-Vocabulary Instance-Semantic Mapping: Zilong Deng,

Federico Tombari,

Marc Pollefeys,

Johanna Wald,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Zilong and Tombari, Federico and Pollefeys, Marc and Wald, Johanna and Barath, Daniel}, title = {OVI-MAP: Open-Vocabulary Instance-Semantic Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12606-12616} }
GDFA: Geometry-Driven Federated Unlearning with Directional Task Vector Alignment: Xiuting Weng,

Ruizhi Pu,

Yuanhang Yao,

Kun Yue,

Zhiwen Tang,

Lixing Yu; [pdf] [supp]
[bibtex]
@InProceedings{Weng_2026_CVPR, author = {Weng, Xiuting and Pu, Ruizhi and Yao, Yuanhang and Yue, Kun and Tang, Zhiwen and Yu, Lixing}, title = {GDFA: Geometry-Driven Federated Unlearning with Directional Task Vector Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10346-10356} }
What Do Visual Tokens Really Encode? Uncovering Sparsity and Redundancy in Multimodal Large Language Models: Yingqi Fan,

Junlong Tong,

Anhao Zhao,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yingqi and Tong, Junlong and Zhao, Anhao and Shen, Xiaoyu}, title = {What Do Visual Tokens Really Encode? Uncovering Sparsity and Redundancy in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11987-11997} }
All in One: Unifying Deepfake Detection, Tampering Localization, and Source Tracing with a Robust Landmark-Identity Watermark: Junjiang Wu,

Liejun Wang,

Zhiqing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Junjiang and Wang, Liejun and Guo, Zhiqing}, title = {All in One: Unifying Deepfake Detection, Tampering Localization, and Source Tracing with a Robust Landmark-Identity Watermark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14106-14115} }
Coordinate Denoising for Non-Equilibrium Molecular Representation Learning: Qianwei Tang,

Baile Xu,

Jian Zhao,

Furao Shen; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Qianwei and Xu, Baile and Zhao, Jian and Shen, Furao}, title = {Coordinate Denoising for Non-Equilibrium Molecular Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3584-3593} }
Learning Cross-View Object Correspondence via Cycle-Consistent Mask Prediction: Shannan Yan,

Leqi Zheng,

Keyu Lv,

Jingchen Ni,

Hongyang Wei,

Jiajun Zhang,

Guangting Wang,

Jing LYU,

Chun Yuan,

Fengyun Rao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Shannan and Zheng, Leqi and Lv, Keyu and Ni, Jingchen and Wei, Hongyang and Zhang, Jiajun and Wang, Guangting and LYU, Jing and Yuan, Chun and Rao, Fengyun}, title = {Learning Cross-View Object Correspondence via Cycle-Consistent Mask Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6653-6663} }
Can We Build Scene Graphs, Not Classify Them? FlowSG: Progressive Image-Conditioned Scene Graph Generation with Flow Matching: Xin Hu,

Ke Qin,

Wen Yin,

Yuan-Fang Li,

Ming Li,

Tao He; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Xin and Qin, Ke and Yin, Wen and Li, Yuan-Fang and Li, Ming and He, Tao}, title = {Can We Build Scene Graphs, Not Classify Them? FlowSG: Progressive Image-Conditioned Scene Graph Generation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10208-10218} }
Lifelong Imitation Learning with Multimodal Latent Replay and Incremental Adjustment: Fanqi Yu,

Matteo Tiezzi,

Tommaso Apicella,

Cigdem Beyan,

Vittorio Murino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Fanqi and Tiezzi, Matteo and Apicella, Tommaso and Beyan, Cigdem and Murino, Vittorio}, title = {Lifelong Imitation Learning with Multimodal Latent Replay and Incremental Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6740-6749} }
A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation: Wentao Qu,

Guofeng Mei,

Yang Wu,

YongShun Gong,

Xiaoshui Huang,

Liang Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Wentao and Mei, Guofeng and Wu, Yang and Gong, YongShun and Huang, Xiaoshui and Xiao, Liang}, title = {A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9434-9444} }
Concept Regions Matter: Benchmarking CLIP with a New Cluster-Importance Approach: Aishwarya Agarwal,

Srikrishna Karanam,

Vineet Gandhi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agarwal_2026_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {Concept Regions Matter: Benchmarking CLIP with a New Cluster-Importance Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2864-2874} }
MotionEnhancer: Leveraging Video Diffusion for Motion-Enhanced Vision-Language Models: Yifan Xu,

Chao Zhang,

Ruifei Ma,

Fei Gao,

Zhifei Yang,

Jiaxing Qi,

Zhipeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yifan and Zhang, Chao and Ma, Ruifei and Gao, Fei and Yang, Zhifei and Qi, Jiaxing and Chen, Zhipeng}, title = {MotionEnhancer: Leveraging Video Diffusion for Motion-Enhanced Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2778-2787} }
OrionEdit: Bridging Reference and Source Images for Generalized Cross-Image Editing: Zeyu Jiang,

Lai Man Po,

Xuyuan Xu,

Yexin Wang,

Guoping Gong,

Haoxuan Wu,

Chenbo Yan,

Kun Li,

Yuyang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zeyu and Po, Lai Man and Xu, Xuyuan and Wang, Yexin and Gong, Guoping and Wu, Haoxuan and Yan, Chenbo and Li, Kun and Liu, Yuyang}, title = {OrionEdit: Bridging Reference and Source Images for Generalized Cross-Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9127-9138} }
GeCo: Geometry-Consistent Regularization for Domain Generalized Semantic Segmentation: Qi Zang,

Dong Zhao,

Nan Pu,

Wenjing Li,

Zhun Zhong,

Meng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zang_2026_CVPR, author = {Zang, Qi and Zhao, Dong and Pu, Nan and Li, Wenjing and Zhong, Zhun and Wang, Meng}, title = {GeCo: Geometry-Consistent Regularization for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {871-881} }
AVGGT: Rethinking Global Attention for Accelerating VGGT: Xianbing Sun,

Zhikai Zhu,

Zhengyu Lou,

Bo Yang,

Jinyang Tang,

Liqing Zhang,

He Wang,

Jianfu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xianbing and Zhu, Zhikai and Lou, Zhengyu and Yang, Bo and Tang, Jinyang and Zhang, Liqing and Wang, He and Zhang, Jianfu}, title = {AVGGT: Rethinking Global Attention for Accelerating VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {251-260} }
CDICS: Delving Into Fine-Grained Attribute for In-Context Segmentation via Compositional Prompts and Phased Decoupling: Zhiyu Li,

Dianmo Sheng,

Qi Chu,

Shilong Chen,

Tao Gong,

Zhou Wei,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiyu and Sheng, Dianmo and Chu, Qi and Chen, Shilong and Gong, Tao and Wei, Zhou and Yu, Nenghai}, title = {CDICS: Delving Into Fine-Grained Attribute for In-Context Segmentation via Compositional Prompts and Phased Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13179-13188} }
Robust Remote Sensing Image-Text Retrieval with Noisy Correspondence: Qiya Song,

Yiqiang Xie,

Yuan Sun,

Renwei Dian,

Xudong Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Qiya and Xie, Yiqiang and Sun, Yuan and Dian, Renwei and Kang, Xudong}, title = {Robust Remote Sensing Image-Text Retrieval with Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9732-9741} }
STAR-R1: Multi-View Spatial TrAnsformation Reasoning by Reinforcing Multimodal LLMs: Zongzhao Li,

Zongyang Ma,

Mingze Li,

Songyou Li,

Yu Rong,

Tingyang Xu,

Ziqi Zhang,

Deli Zhao,

Wenbing Huang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zongzhao and Ma, Zongyang and Li, Mingze and Li, Songyou and Rong, Yu and Xu, Tingyang and Zhang, Ziqi and Zhao, Deli and Huang, Wenbing}, title = {STAR-R1: Multi-View Spatial TrAnsformation Reasoning by Reinforcing Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12041-12051} }
MRI Contrast Enhancement Kinetics World Model: Jindi Kong,

Yuting He,

Cong Xia,

Rongjun Ge,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Jindi and He, Yuting and Xia, Cong and Ge, Rongjun and Li, Shuo}, title = {MRI Contrast Enhancement Kinetics World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1288-1299} }
FaithFusion: Harmonizing Reconstruction and Generation via Pixel-wise Information Gain: YuAn Wang,

Xiaofan Li,

Chi Huang,

Wenhao Zhang,

Hao Li,

Bosheng Wang,

Xun Sun,

Jun Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, YuAn and Li, Xiaofan and Huang, Chi and Zhang, Wenhao and Li, Hao and Wang, Bosheng and Sun, Xun and Wang, Jun}, title = {FaithFusion: Harmonizing Reconstruction and Generation via Pixel-wise Information Gain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1198-1209} }
IR-HGP: Physically-Aware Gaussian Inverse Rendering for High-Illumination Scenes via Generative Priors: Qingan Zhang,

Wensheng Li,

Chengying Gao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qingan and Li, Wensheng and Gao, Chengying}, title = {IR-HGP: Physically-Aware Gaussian Inverse Rendering for High-Illumination Scenes via Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1210-1220} }
GuideFlow: Constraint-Guided Flow Matching for Planning in End-to-End Autonomous Driving: Lin Liu,

Caiyan Jia,

Guanyi Yu,

Ziying Song,

Junqiao Li,

Feiyang Jia,

Peiliang Wu,

Xiaoshuai Hao,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Lin and Jia, Caiyan and Yu, Guanyi and Song, Ziying and Li, Junqiao and Jia, Feiyang and Wu, Peiliang and Hao, Xiaoshuai and Luo, Yadan}, title = {GuideFlow: Constraint-Guided Flow Matching for Planning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3719-3728} }
The Image as Its Own Reward: Reinforcement Learning with Adversarial Reward for Image Generation: Weijia Mao,

Hao Chen,

Zhenheng Yang,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Weijia and Chen, Hao and Yang, Zhenheng and Shou, Mike Zheng}, title = {The Image as Its Own Reward: Reinforcement Learning with Adversarial Reward for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5999-6009} }
Event Stream Filtering via Probability Flux Estimation: Jinze Chen,

Wei Zhai,

Yang Cao,

Bin Li,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jinze and Zhai, Wei and Cao, Yang and Li, Bin and Zha, Zheng-Jun}, title = {Event Stream Filtering via Probability Flux Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8023-8032} }
Layer-wise Instance Binding for Regional and Occlusion Control in Text-to-Image Diffusion Transformers: Ruidong Chen,

Yancheng Bai,

Xuanpu Zhang,

Jianhao Zeng,

Lanjun Wang,

Dan Song,

Lei Sun,

Xiangxiang Chu,

Anan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ruidong and Bai, Yancheng and Zhang, Xuanpu and Zeng, Jianhao and Wang, Lanjun and Song, Dan and Sun, Lei and Chu, Xiangxiang and Liu, Anan}, title = {Layer-wise Instance Binding for Regional and Occlusion Control in Text-to-Image Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11493-11503} }
Open the Motion Door: Atomic Motion Decomposition and Recomposition for Open-Vocabulary Motion Generation: Ke Fan,

Jiangning Zhang,

Ran Yi,

Jingyu Gong,

Yabiao Wang,

Yating Wang,

Xin Tan,

Chengjie Wang,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Ke and Zhang, Jiangning and Yi, Ran and Gong, Jingyu and Wang, Yabiao and Wang, Yating and Tan, Xin and Wang, Chengjie and Ma, Lizhuang}, title = {Open the Motion Door: Atomic Motion Decomposition and Recomposition for Open-Vocabulary Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9330-9341} }
Pantheon360: Taming Digital Twin Generation via 3D-Aware 360deg Video Diffusion: Ting-Hsuan Chen,

Ying-Huan Chen,

Tao Tu,

Jie-Ying Lee,

Cho-Ying Wu,

Fangzhou Lin,

Hengyuan Zhang,

David Paz,

Xinyu Huang,

Yuliang Guo,

Yu-Lun Liu,

Yue Wang,

Liu Ren; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ting-Hsuan and Chen, Ying-Huan and Tu, Tao and Lee, Jie-Ying and Wu, Cho-Ying and Lin, Fangzhou and Zhang, Hengyuan and Paz, David and Huang, Xinyu and Guo, Yuliang and Liu, Yu-Lun and Wang, Yue and Ren, Liu}, title = {Pantheon360: Taming Digital Twin Generation via 3D-Aware 360deg Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11138-11149} }
Correspondence-Attention Alignment for Multi-View Diffusion Models: Minkyung Kwon,

Jinhyeok Choi,

Jiho Park,

Seonghu Jeon,

Jinhyuk Jang,

Junyoung Seo,

Minseop Kwak,

Jin-Hwa Kim,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Minkyung and Choi, Jinhyeok and Park, Jiho and Jeon, Seonghu and Jang, Jinhyuk and Seo, Junyoung and Kwak, Minseop and Kim, Jin-Hwa and Kim, Seungryong}, title = {Correspondence-Attention Alignment for Multi-View Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2316-2326} }
CIGMA: Causal Information-Gain Mechanistic Attribution of Attention Heads in Vision Transformers: Maisha Maliha,

Dean F. Hougen; [pdf] [supp]
[bibtex]
@InProceedings{Maliha_2026_CVPR, author = {Maliha, Maisha and Hougen, Dean F.}, title = {CIGMA: Causal Information-Gain Mechanistic Attribution of Attention Heads in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9891-9900} }
Active Perceptual Inference: A Corticothalamic-Inspired Dynamic Nested Recurrent Network for Multimodal Sentiment Analysis with Incomplete Data: Yujuan Zhang,

Qing Li,

Ziyu Li,

Xiuxing Li,

Zhuo Wang,

Mengrui Xu,

Xia Wu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yujuan and Li, Qing and Li, Ziyu and Li, Xiuxing and Wang, Zhuo and Xu, Mengrui and Wu, Xia}, title = {Active Perceptual Inference: A Corticothalamic-Inspired Dynamic Nested Recurrent Network for Multimodal Sentiment Analysis with Incomplete Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1787-1797} }
Towards Generalized Representations for Low-Light Understanding: When Signal Constancy Meets Semantic Enrichment: Yifan Li,

Haofeng Huang,

Wenhan Yang,

Jiaying Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yifan and Huang, Haofeng and Yang, Wenhan and Liu, Jiaying}, title = {Towards Generalized Representations for Low-Light Understanding: When Signal Constancy Meets Semantic Enrichment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1386-1395} }
Tri-Subspaces Disentanglement for Multimodal Sentiment Analysis: Chunlei Meng,

Jiabin Luo,

Zhenglin Yan,

Zhenyu Yu,

Rong Fu,

Zhongxue Gan,

Chun Ouyang; [pdf] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Chunlei and Luo, Jiabin and Yan, Zhenglin and Yu, Zhenyu and Fu, Rong and Gan, Zhongxue and Ouyang, Chun}, title = {Tri-Subspaces Disentanglement for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8791-8800} }
Event-based Visual Deformation Measurement: Yuliang Wu,

Wei Zhai,

Yuxin Cui,

Tiesong Zhao,

Yang Cao,

Zheng-Jun Zha; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yuliang and Zhai, Wei and Cui, Yuxin and Zhao, Tiesong and Cao, Yang and Zha, Zheng-Jun}, title = {Event-based Visual Deformation Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {903-913} }
GeoDexGrasp: Geometry-aware Generation for Data-efficient and Physics-plausible Dexterous Grasping: Bing Han,

Weiyuan Liu,

Changlong Zhang,

Chenxi Wang,

Zhibin Zhao,

Zhi Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Bing and Liu, Weiyuan and Zhang, Changlong and Wang, Chenxi and Zhao, Zhibin and Zhai, Zhi}, title = {GeoDexGrasp: Geometry-aware Generation for Data-efficient and Physics-plausible Dexterous Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6729-6739} }
Beyond Soft Label: Dataset Distillation via Orthogonal Gradient Matching: Deyu Bo,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Deyu and Wang, Xinchao}, title = {Beyond Soft Label: Dataset Distillation via Orthogonal Gradient Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5596-5605} }
Fast-ThinkAct: Efficient Vision-Language-Action Reasoning via Verbalizable Latent Planning: Chi-Pin Huang,

Yunze Man,

Zhiding Yu,

Min-Hung Chen,

Jan Kautz,

Yu-Chiang Frank Wang,

Fu-En Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Chi-Pin and Man, Yunze and Yu, Zhiding and Chen, Min-Hung and Kautz, Jan and Wang, Yu-Chiang Frank and Yang, Fu-En}, title = {Fast-ThinkAct: Efficient Vision-Language-Action Reasoning via Verbalizable Latent Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5070-5081} }
Geometry-Guided 3D Visual Token Pruning for Video-Language Models: Han Li,

Zehao Huang,

Jiahui Fu,

Naiyan Wang,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Han and Huang, Zehao and Fu, Jiahui and Wang, Naiyan and Liu, Si}, title = {Geometry-Guided 3D Visual Token Pruning for Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9615-9625} }
DyFCLT: Dynamic Frequency-Decoupled Cross-Modal Learning Transformer for Multimodal Tiny Object Detection: Chaolang Li,

Pengwen Dai,

Jingyu Li,

Siyuan Yao,

Yuchen Jiang,

Zhuoran Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chaolang and Dai, Pengwen and Li, Jingyu and Yao, Siyuan and Jiang, Yuchen and Zheng, Zhuoran}, title = {DyFCLT: Dynamic Frequency-Decoupled Cross-Modal Learning Transformer for Multimodal Tiny Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11313-11323} }
Residual Primitive Fitting of 3D Shapes with SuperFrusta: Aditya Ganeshan,

Matheus Gadelha,

Thibault Groueix,

Zhiqin Chen,

Siddhartha Chaudhuri,

Vladimir Kim,

Wang Yifan,

Daniel Ritchie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganeshan_2026_CVPR, author = {Ganeshan, Aditya and Gadelha, Matheus and Groueix, Thibault and Chen, Zhiqin and Chaudhuri, Siddhartha and Kim, Vladimir and Yifan, Wang and Ritchie, Daniel}, title = {Residual Primitive Fitting of 3D Shapes with SuperFrusta}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7404-7413} }
SAM 3D: 3Dfy Anything in Images: Xingyu Chen,

FU-JEN CHU,

Pierre Gleize,

Kevin J Liang,

Alexander Sax,

Hao Tang,

Weiyao Wang,

Michelle Guo,

Thibaut Hardin,

Xiang Li,

Aohan Lin,

Jia-Wei Liu,

Ziqi Ma,

Anushka Sagar,

Bowen Song,

Xiaodong Wang,

Jianing Yang,

Bowen Zhang,

Piotr Dollár,

Georgia Gkioxari,

Matt Feiszli,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xingyu and CHU, FU-JEN and Gleize, Pierre and Liang, Kevin J and Sax, Alexander and Tang, Hao and Wang, Weiyao and Guo, Michelle and Hardin, Thibaut and Li, Xiang and Lin, Aohan and Liu, Jia-Wei and Ma, Ziqi and Sagar, Anushka and Song, Bowen and Wang, Xiaodong and Yang, Jianing and Zhang, Bowen and Doll\'ar, Piotr and Gkioxari, Georgia and Feiszli, Matt and Malik, Jitendra}, title = {SAM 3D: 3Dfy Anything in Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7220-7232} }
RNED: Rotary Number Encoding and Decoding for Medical VLMs: Fengbei Liu,

Sunwoo Kwak,

Nusrat Nizam,

Ilan Richter,

Ashley Beecy,

Jayant Raikhelkar,

Deborah Estrin,

Mert R. Sabuncu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Fengbei and Kwak, Sunwoo and Nizam, Nusrat and Richter, Ilan and Beecy, Ashley and Raikhelkar, Jayant and Estrin, Deborah and Sabuncu, Mert R.}, title = {RNED: Rotary Number Encoding and Decoding for Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13722-13731} }
CogniEdit: Dense Gradient Flow Optimization for Fine-Grained Image Editing: Yan Li,

Lin Liu,

Xiaopeng Zhang,

Wei Xue,

Wenhan Luo,

Yike Guo,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Liu, Lin and Zhang, Xiaopeng and Xue, Wei and Luo, Wenhan and Guo, Yike and Tian, Qi}, title = {CogniEdit: Dense Gradient Flow Optimization for Fine-Grained Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1145-1154} }
BiOTPrompt: Bidirectional Optimal Transport Guided Prompting for Disease Evolution-aware Radiology Report Generation: Tengfei Liu,

Yijian Fan,

Boyue Wang,

Yongli Hu,

Mingjie Li,

Jinghua Li,

Junbin Gao,

Xiaojun Chang,

Zhihui Li,

Baocai Yin; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tengfei and Fan, Yijian and Wang, Boyue and Hu, Yongli and Li, Mingjie and Li, Jinghua and Gao, Junbin and Chang, Xiaojun and Li, Zhihui and Yin, Baocai}, title = {BiOTPrompt: Bidirectional Optimal Transport Guided Prompting for Disease Evolution-aware Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13755-13765} }
Anti-Degradation Lifelong Multi-View Clustering: Xingfeng Li,

Hao Pan,

Honglin Yuan,

Yuan Sun,

Xujian Zhao,

Jiaqi Lin,

Zhenwen Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xingfeng and Pan, Hao and Yuan, Honglin and Sun, Yuan and Zhao, Xujian and Lin, Jiaqi and Ren, Zhenwen}, title = {Anti-Degradation Lifelong Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8749-8759} }
FedSDR: Federated Graph Learning with Structural Noise Detection and Reconstruction: Jiaqi Liu,

Zihan Tan,

Guancheng Wan,

Wenke Huang,

He Li,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Tan, Zihan and Wan, Guancheng and Huang, Wenke and Li, He and Ye, Mang}, title = {FedSDR: Federated Graph Learning with Structural Noise Detection and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3379-3389} }
ReScene4D: Temporally Consistent Semantic Instance Segmentation of Evolving Indoor 3D Scenes: Emily Steiner,

Jianhao Zheng,

Henry Howard-Jenkins,

Chris Xie,

Iro Armeni; [pdf] [supp]
[bibtex]
@InProceedings{Steiner_2026_CVPR, author = {Steiner, Emily and Zheng, Jianhao and Howard-Jenkins, Henry and Xie, Chris and Armeni, Iro}, title = {ReScene4D: Temporally Consistent Semantic Instance Segmentation of Evolving Indoor 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10710-10720} }
Thinking with Frames: Generative Video Distortion Evaluation via Frame Reward Model: Yuan Wang,

Borui Liao,

Huijuan Huang,

Jinda Lu,

Ouxiang Li,

Kuien Liu,

Meng Wang,

Xiang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuan and Liao, Borui and Huang, Huijuan and Lu, Jinda and Li, Ouxiang and Liu, Kuien and Wang, Meng and Wang, Xiang}, title = {Thinking with Frames: Generative Video Distortion Evaluation via Frame Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4526-4536} }
Lipschitz Optimization for Formal Verification of Homographies: Jean-Guillaume Durand,

Panagiotis Kouvaros,

Maxime Gariel,

Alessio Lomuscio; [pdf] [supp]
[bibtex]
@InProceedings{Durand_2026_CVPR, author = {Durand, Jean-Guillaume and Kouvaros, Panagiotis and Gariel, Maxime and Lomuscio, Alessio}, title = {Lipschitz Optimization for Formal Verification of Homographies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13306-13315} }
Glove2Hand: Synthesizing Natural Hand-Object Interaction from Multi-Modal Sensing Gloves: Xinyu Zhang,

Ziyi Kou,

Chuan Qin,

Mia Huang,

Ergys Ristani,

Ankit Kumar,

Lele Chen,

Kun He,

Abdeslam Boularias,

Li Guan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinyu and Kou, Ziyi and Qin, Chuan and Huang, Mia and Ristani, Ergys and Kumar, Ankit and Chen, Lele and He, Kun and Boularias, Abdeslam and Guan, Li}, title = {Glove2Hand: Synthesizing Natural Hand-Object Interaction from Multi-Modal Sensing Gloves}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1829-1840} }
HiFICL: High-Fidelity In-Context Learning for Multimodal Tasks: Xiaoyu Li,

Yuhang Liu,

Xuanshuo Kang,

Zheng Luo,

Fangqi Lou,

Xiaohua Wu,

Zihan Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaoyu and Liu, Yuhang and Kang, Xuanshuo and Luo, Zheng and Lou, Fangqi and Wu, Xiaohua and Xiong, Zihan}, title = {HiFICL: High-Fidelity In-Context Learning for Multimodal Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3069-3078} }
Ar2Can: An Architect and an Artist Leveraging a Canvas for Multi-Human Generation: Shubhankar Borse,

Phuc Pham,

Farzad Farhadzadeh,

Seokeon Choi,

Phong Nguyen,

Anh Tran,

Sungrack Yun,

Munawar Hayat,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Borse_2026_CVPR, author = {Borse, Shubhankar and Pham, Phuc and Farhadzadeh, Farzad and Choi, Seokeon and Nguyen, Phong and Tran, Anh and Yun, Sungrack and Hayat, Munawar and Porikli, Fatih}, title = {Ar2Can: An Architect and an Artist Leveraging a Canvas for Multi-Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {550-560} }
DICArt: Advancing Category-level Articulated Object Pose Estimation in Discrete State-Spaces: Li Zhang,

Mingyu Mei,

Ailing Wang,

Xianhui Meng,

Yan Zhong,

Xinyuan Song,

Liu Liu,

Rujing Wang,

Zaixing He,

Cewu Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Li and Mei, Mingyu and Wang, Ailing and Meng, Xianhui and Zhong, Yan and Song, Xinyuan and Liu, Liu and Wang, Rujing and He, Zaixing and Lu, Cewu}, title = {DICArt: Advancing Category-level Articulated Object Pose Estimation in Discrete State-Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4687-4697} }
Scone: Bridging Composition and Distinction in Subject-Driven Image Generation via Unified Understanding-Generation Modeling: Yuran Wang,

Bohan Zeng,

Chengzhuo Tong,

Wenxuan Liu,

Yang Shi,

Xiaochen Ma,

Hao Liang,

Yuanxing Zhang,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuran and Zeng, Bohan and Tong, Chengzhuo and Liu, Wenxuan and Shi, Yang and Ma, Xiaochen and Liang, Hao and Zhang, Yuanxing and Zhang, Wentao}, title = {Scone: Bridging Composition and Distinction in Subject-Driven Image Generation via Unified Understanding-Generation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7773-7783} }
AdaptVision: Efficient Vision-Language Models via Adaptive Visual Acquisition: Zichuan Lin,

Yicheng Liu,

Yang Yang,

Lvfang Tao,

Deheng Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zichuan and Liu, Yicheng and Yang, Yang and Tao, Lvfang and Ye, Deheng}, title = {AdaptVision: Efficient Vision-Language Models via Adaptive Visual Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11923-11932} }
GaussianZoom: Progressive Zoom-in Generative 3D Gaussian Splatting with Geometric and Semantic Guidance: Jiale Shi,

Jiarui Hu,

Zesong Yang,

Kaixuan Luan,

Hujun Bao,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Jiale and Hu, Jiarui and Yang, Zesong and Luan, Kaixuan and Bao, Hujun and Cui, Zhaopeng}, title = {GaussianZoom: Progressive Zoom-in Generative 3D Gaussian Splatting with Geometric and Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11850-11859} }
An Instance-Centric Panoptic Occupancy Prediction Benchmark for Autonomous Driving: Yi Feng,

Junwu E,

Zizhan Guo,

Yu Ma,

Hanli Wang,

Rui Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yi and E, Junwu and Guo, Zizhan and Ma, Yu and Wang, Hanli and Fan, Rui}, title = {An Instance-Centric Panoptic Occupancy Prediction Benchmark for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14219-14228} }
Learning from Itself: Mining Internal Knowledge from Vision Language Models for Continual Learning: Yizheng Gong,

Siyue Yu,

Waleed Al-Nuaimy,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Learning from Itself: Mining Internal Knowledge from Vision Language Models for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10830-10839} }
Reinforcing Video Object Segmentation to Think before it Segments: Sitong Gong,

Yunzhi Zhuge,

Lu Zhang,

Jiazuo Yu,

Pingping Zhang,

Xu Jia,

Huchuan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Sitong and Zhuge, Yunzhi and Zhang, Lu and Yu, Jiazuo and Zhang, Pingping and Jia, Xu and Lu, Huchuan}, title = {Reinforcing Video Object Segmentation to Think before it Segments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3835-3844} }
GazeOnce360: Fisheye-Based 360deg Multi-Person Gaze Estimation with Global-Local Feature Fusion: Zhuojiang Cai,

Zhenghui Sun,

Feng Lu; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Zhuojiang and Sun, Zhenghui and Lu, Feng}, title = {GazeOnce360: Fisheye-Based 360deg Multi-Person Gaze Estimation with Global-Local Feature Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12312-12321} }
Scene-Centric Unsupervised Video Panoptic Segmentation: Christoph Reich,

Oliver Hahn,

Nikita Araslanov,

Laura Leal-Taixé,

Christian Rupprecht,

Daniel Cremers,

Stefan Roth; [pdf] [supp]
[bibtex]
@InProceedings{Reich_2026_CVPR, author = {Reich, Christoph and Hahn, Oliver and Araslanov, Nikita and Leal-Taix\'e, Laura and Rupprecht, Christian and Cremers, Daniel and Roth, Stefan}, title = {Scene-Centric Unsupervised Video Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10753-10765} }
Retrieve-to-Restore: Efficient All-in-One Image Restoration with a Retrieval-Based Degradation Bank: Chenxu Wang,

Kai Zhang,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenxu and Zhang, Kai and Yang, Jian}, title = {Retrieve-to-Restore: Efficient All-in-One Image Restoration with a Retrieval-Based Degradation Bank}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1277-1287} }
MotionHiFlow: Text-to-Motion via Hierarchical Flow Matching: Heng Li,

Xiaotong Lin,

Ling-An Zeng,

Yulei Kang,

Shuai Li,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Heng and Lin, Xiaotong and Zeng, Ling-An and Kang, Yulei and Li, Shuai and Hu, Jian-Fang}, title = {MotionHiFlow: Text-to-Motion via Hierarchical Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9352-9363} }
What Are You Doing? A Closer Look at Controllable Human Video Generation: Emanuele Bugliarello,

Anurag Arnab,

Roni Paiss,

Christy Koh,

Pieter-Jan Kindermans,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bugliarello_2026_CVPR, author = {Bugliarello, Emanuele and Arnab, Anurag and Paiss, Roni and Koh, Christy and Kindermans, Pieter-Jan and Schmid, Cordelia}, title = {What Are You Doing? A Closer Look at Controllable Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11414-11425} }
Semantic-Adaptive Diffusion for Dynamic Spatiotemporal Fusion: Jinsong Zhang,

Ying Qu,

Yuan Liao,

Hairong Qi,

Zhenzhou Shao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinsong and Qu, Ying and Liao, Yuan and Qi, Hairong and Shao, Zhenzhou}, title = {Semantic-Adaptive Diffusion for Dynamic Spatiotemporal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12344-12353} }
LoD-Loc v3: Generalized Aerial Localization in Dense Cities using Instance Silhouette Alignment: Shuaibang Peng,

Juelin Zhu,

Xia Li,

Kun Yang,

Yu Liu,

Maojun Zhang,

Shen Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Shuaibang and Zhu, Juelin and Li, Xia and Yang, Kun and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {LoD-Loc v3: Generalized Aerial Localization in Dense Cities using Instance Silhouette Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12193-12205} }
Flow3r: Factored Flow Prediction for Scalable Visual Geometry Learning: Zhongxiao Cong,

Qitao Zhao,

Minsik Jeon,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cong_2026_CVPR, author = {Cong, Zhongxiao and Zhao, Qitao and Jeon, Minsik and Tulsiani, Shubham}, title = {Flow3r: Factored Flow Prediction for Scalable Visual Geometry Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {438-447} }
Face2Scene: Using Facial Degradation as an Oracle for Diffusion-Based Scene Restoration: Amirhossein Kazerouni,

Maitreya Suin,

Tristan Aumentado-Armstrong,

Sina Honari,

Amanpreet Walia,

Iqbal Mohomed,

Konstantinos G. Derpanis,

Babak Taati,

Alex Levinshtein; [pdf] [supp]
[bibtex]
@InProceedings{Kazerouni_2026_CVPR, author = {Kazerouni, Amirhossein and Suin, Maitreya and Aumentado-Armstrong, Tristan and Honari, Sina and Walia, Amanpreet and Mohomed, Iqbal and Derpanis, Konstantinos G. and Taati, Babak and Levinshtein, Alex}, title = {Face2Scene: Using Facial Degradation as an Oracle for Diffusion-Based Scene Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8428-8438} }
Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving: Jianhua Han,

Meng Tian,

Jiangtong Zhu,

Fan He,

Huixin Zhang,

Sitong Guo,

Dechang Zhu,

Hao Tang,

Pei Xu,

Yuze Guo,

Minzhe Niu,

Haojie Zhu,

Qichao Dong,

Xuechao Yan,

Siyuan Dong,

Lu Hou,

Qingqiu Huang,

Xiaosong Jia,

Hang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jianhua and Tian, Meng and Zhu, Jiangtong and He, Fan and Zhang, Huixin and Guo, Sitong and Zhu, Dechang and Tang, Hao and Xu, Pei and Guo, Yuze and Niu, Minzhe and Zhu, Haojie and Dong, Qichao and Yan, Xuechao and Dong, Siyuan and Hou, Lu and Huang, Qingqiu and Jia, Xiaosong and Xu, Hang}, title = {Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10642-10655} }
Neuro-Cognitive Reward Modeling for Human-Centered Autonomous Vehicle Control: Zhuoli Zhuang,

Yu-Cheng Chang,

Yu-Kai Wang,

Thomas Do,

Chin-Teng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Zhuoli and Chang, Yu-Cheng and Wang, Yu-Kai and Do, Thomas and Lin, Chin-Teng}, title = {Neuro-Cognitive Reward Modeling for Human-Centered Autonomous Vehicle Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10599-10609} }
EvoGraph-R1: Self-Evolving Multimodal Knowledge Hypergraphs for Agentic Retrieval: Jiashi Lin,

Changhong Jiang,

Xiangru Lin,

Ruifei Zhang,

Xinyi Zhu,

Jiyao Liu,

Cheng Tang,

Ye Du,

Shujian Gao,

Junzhi Ning,

Lihao Liu,

Ziyan Huang,

Tianbin Li,

Jin Ye,

Junjun He; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiashi and Jiang, Changhong and Lin, Xiangru and Zhang, Ruifei and Zhu, Xinyi and Liu, Jiyao and Tang, Cheng and Du, Ye and Gao, Shujian and Ning, Junzhi and Liu, Lihao and Huang, Ziyan and Li, Tianbin and Ye, Jin and He, Junjun}, title = {EvoGraph-R1: Self-Evolving Multimodal Knowledge Hypergraphs for Agentic Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {756-765} }
FHAvatar: Fast and High-Fidelity Reconstruction of Face-and-Hair Composable 3D Head Avatar from Few Casual Captures: Yujie Sun,

Zhuoqiang Cai,

Chaoyue Niu,

Jianchuan Chen,

Zhiwen Chen,

Chengfei Lv,

Fan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yujie and Cai, Zhuoqiang and Niu, Chaoyue and Chen, Jianchuan and Chen, Zhiwen and Lv, Chengfei and Wu, Fan}, title = {FHAvatar: Fast and High-Fidelity Reconstruction of Face-and-Hair Composable 3D Head Avatar from Few Casual Captures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4132-4144} }
Edge-RecViT: Efficient Vision Transformer via Semantic-Refined Dynamic Recursion: YiZhou Li,

Jinyi Xu,

Mingyu Yin,

Xianyi Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, YiZhou and Xu, Jinyi and Yin, Mingyu and Zhao, Xianyi}, title = {Edge-RecViT: Efficient Vision Transformer via Semantic-Refined Dynamic Recursion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12987-12996} }
TVHighlights: LLM-Guided Human-Free Collaborative Training for Video Highlight Detection in Movies and TV Dramas: Qi Qiu,

Xuan Wu,

Jiawei Peng,

Yuan Miao,

Xu Yang,

Yanlong Du; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Qi and Wu, Xuan and Peng, Jiawei and Miao, Yuan and Yang, Xu and Du, Yanlong}, title = {TVHighlights: LLM-Guided Human-Free Collaborative Training for Video Highlight Detection in Movies and TV Dramas}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9773-9783} }
BiPA: Bilevel Prompt Adaptation for Underwater Instance Segmentation: Long Ma,

Haoze Zheng,

Yuhang Mao,

Jinyuan Liu,

Chengpei Xu,

Xinwei Xue,

Yi Wang,

Xiangjian He,

Weimin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Long and Zheng, Haoze and Mao, Yuhang and Liu, Jinyuan and Xu, Chengpei and Xue, Xinwei and Wang, Yi and He, Xiangjian and Wang, Weimin}, title = {BiPA: Bilevel Prompt Adaptation for Underwater Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10731-10740} }
Beyond Success: Refining Elegant Robot Manipulation from Mixed-Quality Data via Just-in-Time Intervention: Yanbo Mao,

Jianlong Fu,

Ruoxuan Zhang,

Hongxia Xie,

Meibao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Yanbo and Fu, Jianlong and Zhang, Ruoxuan and Xie, Hongxia and Yao, Meibao}, title = {Beyond Success: Refining Elegant Robot Manipulation from Mixed-Quality Data via Just-in-Time Intervention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13508-13518} }
GThinker: Towards General Multimodal Reasoning via Cue-Guided Rethinking: Yufei Zhan,

Ziheng Wu,

Yousong Zhu,

Rongkun Xue,

Guanghao Zhou,

Ruipu Luo,

Zhenghao Chen,

Can Zhang,

Yifan Li,

Zhentao He,

Zheming Yang,

Ming Tang,

Minghui Qiu,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yufei and Wu, Ziheng and Zhu, Yousong and Xue, Rongkun and Zhou, Guanghao and Luo, Ruipu and Chen, Zhenghao and Zhang, Can and Li, Yifan and He, Zhentao and Yang, Zheming and Tang, Ming and Qiu, Minghui and Wang, Jinqiao}, title = {GThinker: Towards General Multimodal Reasoning via Cue-Guided Rethinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11954-11965} }
Physically Ground Commonsense Knowledge for Articulated Object Manipulation with Analytic Concepts: Jiude Wei,

Yuxuan Li,

Cewu Lu,

Jianhua Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jiude and Li, Yuxuan and Lu, Cewu and Sun, Jianhua}, title = {Physically Ground Commonsense Knowledge for Articulated Object Manipulation with Analytic Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13519-13528} }
CADFS: A Big CAD Program Dataset and Framework for Computer-Aided Design with Large Language Models: Vladislav Pyatov,

Gleb Bobrovskikh,

Saveliy Galochkin,

Nikita Boldyrev,

Oleg Voynov,

Alexander Filippov,

Gonzalo Ferrer,

Peter Wonka,

Evgeny Burnaev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pyatov_2026_CVPR, author = {Pyatov, Vladislav and Bobrovskikh, Gleb and Galochkin, Saveliy and Boldyrev, Nikita and Voynov, Oleg and Filippov, Alexander and Ferrer, Gonzalo and Wonka, Peter and Burnaev, Evgeny}, title = {CADFS: A Big CAD Program Dataset and Framework for Computer-Aided Design with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10176-10186} }
Perceptual Neural Video Compression with Color Separation and Rank Chain: Xiongzhuang Liang,

Chuanbo Tang,

Zhuoyuan Li,

Li Li,

Dong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Xiongzhuang and Tang, Chuanbo and Li, Zhuoyuan and Li, Li and Liu, Dong}, title = {Perceptual Neural Video Compression with Color Separation and Rank Chain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5348-5358} }
BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation: Rachit Saluja,

Asli Cihangir,

Ruining Deng,

Johannes C. Paetzold,

Fengbei Liu,

Mert R. Sabuncu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saluja_2026_CVPR, author = {Saluja, Rachit and Cihangir, Asli and Deng, Ruining and Paetzold, Johannes C. and Liu, Fengbei and Sabuncu, Mert R.}, title = {BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8492-8502} }
FlashDecoder: Real-Time Latent-to-Pixel Streaming Decoder with Transformers: Minguk Kang,

Suha Kwak; [pdf] [supp]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Minguk and Kwak, Suha}, title = {FlashDecoder: Real-Time Latent-to-Pixel Streaming Decoder with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5294-5305} }
SyncMos: Scalable Motion Synchronisation for Multi-Agent Scene Interaction: Lingxiao Li,

Dongwon Kim,

Lingyan Ruan,

Bin Chen,

Taesoo Kwon,

Taehyun Rhee; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lingxiao and Kim, Dongwon and Ruan, Lingyan and Chen, Bin and Kwon, Taesoo and Rhee, Taehyun}, title = {SyncMos: Scalable Motion Synchronisation for Multi-Agent Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8174-8182} }
UZ3DVG: Unaided Zero-Shot 3D Visual Grounding with Generated Language Conditions: Wenbin Tan,

Jiawen Lin,

Yuan Xie,

Yachao Zhang,

Yanyun Qu; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Wenbin and Lin, Jiawen and Xie, Yuan and Zhang, Yachao and Qu, Yanyun}, title = {UZ3DVG: Unaided Zero-Shot 3D Visual Grounding with Generated Language Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9547-9557} }
CoLoR: The Devil is in Scene Coordinate Regression for Large-Scale Visual Localization: Xindong Mao,

Hang Li,

Yuchen Wu,

Jiahe Li,

Xiao Bai,

Jin Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Xindong and Li, Hang and Wu, Yuchen and Li, Jiahe and Bai, Xiao and Zheng, Jin}, title = {CoLoR: The Devil is in Scene Coordinate Regression for Large-Scale Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12206-12216} }
$\alpha$Matte4K & $\mu$Matting: Dataset and Model for Ultra-Micro Precision Alpha Video Matting: Xinyi Chen,

Hang Dong,

Baowei Jiang,

Shenkun Xu,

Youqi Guan,

Kanle Shi,

Kun Gai,

Haichuan Song; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xinyi and Dong, Hang and Jiang, Baowei and Xu, Shenkun and Guan, Youqi and Shi, Kanle and Gai, Kun and Song, Haichuan}, title = {\${\textbackslash}alpha\$Matte4K \& \${\textbackslash}mu\$Matting: Dataset and Model for Ultra-Micro Precision Alpha Video Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12491-12500} }
Your One-Stop Solution for AI-Generated Video Detection: Long Ma,

Zihao Xue,

Yan Wang,

Zhiyuan Yan,

Jin Xu,

Xiaorui Jiang,

Haiyang Yu,

Yong Liao,

Zhen Bi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Long and Xue, Zihao and Wang, Yan and Yan, Zhiyuan and Xu, Jin and Jiang, Xiaorui and Yu, Haiyang and Liao, Yong and Bi, Zhen}, title = {Your One-Stop Solution for AI-Generated Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4458-4470} }
Breaking Multimodal LLM Safety via Video-Driven Prompting: Dong Wang,

Xiangyu He,

Xinqi Lyu,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dong and He, Xiangyu and Lyu, Xinqi and Xiao, Bin}, title = {Breaking Multimodal LLM Safety via Video-Driven Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8566-8576} }
Common Inpainted Objects In-N-Out of Context: Tianze Yang,

Tyson Jordan,

Ruitong Sun,

Ninghao Liu,

Jin Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Tianze and Jordan, Tyson and Sun, Ruitong and Liu, Ninghao and Sun, Jin}, title = {Common Inpainted Objects In-N-Out of Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13069-13079} }
U4D: Uncertainty-Aware 4D World Modeling from LiDAR Sequences: Xiang Xu,

Alan Liang,

Youquan Liu,

Linfeng Li,

Lingdong Kong,

Ziwei Liu,

Qingshan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Xiang and Liang, Alan and Liu, Youquan and Li, Linfeng and Kong, Lingdong and Liu, Ziwei and Liu, Qingshan}, title = {U4D: Uncertainty-Aware 4D World Modeling from LiDAR Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10027-10039} }
Eliminate Distance Differences Induced by Backdoor Attacks: Layer-Selective Training and Clipping to Mask Backdoor Models: Xuzeng Li,

Tao Zhang,

Xiangyun Tang,

Jiacheng Wang,

Jian Wang,

Jiawen Kang,

Jiqiang Liu,

Zhen Han,

Dusit Niyato,

Dong In Kim; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuzeng and Zhang, Tao and Tang, Xiangyun and Wang, Jiacheng and Wang, Jian and Kang, Jiawen and Liu, Jiqiang and Han, Zhen and Niyato, Dusit and Kim, Dong In}, title = {Eliminate Distance Differences Induced by Backdoor Attacks: Layer-Selective Training and Clipping to Mask Backdoor Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13336-13345} }
IntroSVG: Learning from Rendering Feedback for Text-to-SVG Generation via an Introspective Generator-Critic Framework: Feiyu Wang,

Jiayuan Yang,

Zhiyuan Zhao,

Da Zhang,

Bingyu Li,

Peng Liu,

Junyu Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Feiyu and Yang, Jiayuan and Zhao, Zhiyuan and Zhang, Da and Li, Bingyu and Liu, Peng and Gao, Junyu}, title = {IntroSVG: Learning from Rendering Feedback for Text-to-SVG Generation via an Introspective Generator-Critic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {615-625} }
BHCast: Unlocking Black Hole Plasma Dynamics from a Single Blurry Image with Long-Term Forecasting: Renbo Tu,

Ali SaraerToosi,

Nicholas S. Conroy,

Gennady Pekhimenko,

Aviad Levis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Renbo and SaraerToosi, Ali and Conroy, Nicholas S. and Pekhimenko, Gennady and Levis, Aviad}, title = {BHCast: Unlocking Black Hole Plasma Dynamics from a Single Blurry Image with Long-Term Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5606-5616} }
BinaryAttention: One-Bit QK-Attention for Vision and Diffusion Transformers: Chaodong Xiao,

Zhengqiang Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Chaodong and Zhang, Zhengqiang and Zhang, Lei}, title = {BinaryAttention: One-Bit QK-Attention for Vision and Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12106-12117} }
FlowPortal: Residual-Corrected Flow for Training-Free Video Relighting and Background Replacement: Wenshuo Gao,

Junyi Fan,

Jiangyue Zeng,

Shuai Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Wenshuo and Fan, Junyi and Zeng, Jiangyue and Yang, Shuai}, title = {FlowPortal: Residual-Corrected Flow for Training-Free Video Relighting and Background Replacement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2025-2034} }
ENC-Bench: A Benchmark for Evaluating Multimodal Large Language Models in Electronic Navigational Chart Understanding: Ao Cheng,

Xingming Li,

Xuanyu Ji,

Xixiang He,

Qiyao Sun,

Chunping Qiu,

Runke Huang,

Qingyong Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ao and Li, Xingming and Ji, Xuanyu and He, Xixiang and Sun, Qiyao and Qiu, Chunping and Huang, Runke and Hu, Qingyong}, title = {ENC-Bench: A Benchmark for Evaluating Multimodal Large Language Models in Electronic Navigational Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2423-2433} }
BD-Merging: Bias-Aware Dynamic Model Merging with Evidence-Guided Contrastive Learning: Yuhan Xie,

Chen Lyu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yuhan and Lyu, Chen}, title = {BD-Merging: Bias-Aware Dynamic Model Merging with Evidence-Guided Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12892-12901} }
One Model, Many Budgets: Elastic Latent Interfaces for Diffusion Transformers: Moayed Haji-Ali,

Willi Menapace,

Ivan Skorokhodov,

Dogyun Park,

Anil Kag,

Michael Vasilkovsky,

Sergey Tulyakov,

Vicente Ordonez,

Aliaksandr Siarohin; [pdf] [supp]
[bibtex]
@InProceedings{Haji-Ali_2026_CVPR, author = {Haji-Ali, Moayed and Menapace, Willi and Skorokhodov, Ivan and Park, Dogyun and Kag, Anil and Vasilkovsky, Michael and Tulyakov, Sergey and Ordonez, Vicente and Siarohin, Aliaksandr}, title = {One Model, Many Budgets: Elastic Latent Interfaces for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4558-4568} }
OmniSonic: Towards Universal and Holistic Audio Generation from Video and Text: Weiguo Pian,

Saksham Singh Kushwaha,

Zhimin Chen,

Shijian Deng,

Kai Wang,

Yunhui Guo,

Yapeng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pian_2026_CVPR, author = {Pian, Weiguo and Kushwaha, Saksham Singh and Chen, Zhimin and Deng, Shijian and Wang, Kai and Guo, Yunhui and Tian, Yapeng}, title = {OmniSonic: Towards Universal and Holistic Audio Generation from Video and Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {540-549} }
SAR2Net: Learning Spatially Anchored Representations for Retrieval-Guided Cross-Stain Alignment: Tianle Shen,

Fang Yan,

Xiaofan Zhang; [pdf]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Tianle and Yan, Fang and Zhang, Xiaofan}, title = {SAR2Net: Learning Spatially Anchored Representations for Retrieval-Guided Cross-Stain Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12544-12553} }
ActiveVLA: Injecting Active Perception into Vision-Language-Action Models for Precise 3D Robotic Manipulation: Zhenyang Liu,

Yongchong Gu,

Yikai Wang,

Xiangyang Xue,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhenyang and Gu, Yongchong and Wang, Yikai and Xue, Xiangyang and Fu, Yanwei}, title = {ActiveVLA: Injecting Active Perception into Vision-Language-Action Models for Precise 3D Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8141-8151} }
MaskAdapt: Learning Flexible Motion Adaptation via Mask-Invariant Prior for Physics-Based Characters: Soomin Park,

Eunseong Lee,

Kwang Bin Lee,

Sung-Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Soomin and Lee, Eunseong and Bin Lee, Kwang and Lee, Sung-Hee}, title = {MaskAdapt: Learning Flexible Motion Adaptation via Mask-Invariant Prior for Physics-Based Characters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2285-2294} }
SeeU: Seeing the Unseen World via 4D Dynamics-aware Generation: Yu Yuan,

Tharindu Wickremasinghe,

Zeeshan Nadir,

Xijun Wang,

Yiheng Chi,

Stanley H. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Yu and Wickremasinghe, Tharindu and Nadir, Zeeshan and Wang, Xijun and Chi, Yiheng and Chan, Stanley H.}, title = {SeeU: Seeing the Unseen World via 4D Dynamics-aware Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11150-11162} }
REVISOR: Beyond Textual Reflection, Towards Multimodal Introspective Reasoning in Long-Form Video Understanding: Jiaze Li,

Hao Yin,

Wenhui Tan,

Jingyang Chen,

Boshen Xu,

Yuxun Qu,

Yijing Chen,

Jianzhong Ju,

Zhenbo Luo,

Jian Luan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiaze and Yin, Hao and Tan, Wenhui and Chen, Jingyang and Xu, Boshen and Qu, Yuxun and Chen, Yijing and Ju, Jianzhong and Luo, Zhenbo and Luan, Jian}, title = {REVISOR: Beyond Textual Reflection, Towards Multimodal Introspective Reasoning in Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5059-5069} }
Hierarchical Concept Embedding & Pursuit for Interpretable Image Classification: Nghia Nguyen,

Tianjiao Ding,

René Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Nghia and Ding, Tianjiao and Vidal, Ren\'e}, title = {Hierarchical Concept Embedding \& Pursuit for Interpretable Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2907-2917} }
Bias In, Bias Out? Finding Unbiased Subnetworks in Vanilla Models: Ivan Luiz De Moura Matos,

Abdel Djalil Sad Saoud,

Ekaterina Iakovleva,

Vito Paolo Pastore,

Enzo Tartaglione; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{De_Moura_Matos_2026_CVPR, author = {De Moura Matos, Ivan Luiz and Saoud, Abdel Djalil Sad and Iakovleva, Ekaterina and Pastore, Vito Paolo and Tartaglione, Enzo}, title = {Bias In, Bias Out? Finding Unbiased Subnetworks in Vanilla Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3294-3305} }
ConceptPrism: Concept Disentanglement in Personalized Diffusion Models via Residual Token Optimization: Minseo Kim,

Minchan Kwon,

Dongyeun Lee,

Yunho Jeon,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minseo and Kwon, Minchan and Lee, Dongyeun and Jeon, Yunho and Kim, Junmo}, title = {ConceptPrism: Concept Disentanglement in Personalized Diffusion Models via Residual Token Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2381-2390} }
Video2Robo: 3DGS-based Synthetic Data from One Video Enables Scalable Robot Learning: Yinan Deng,

Kejia Hu,

Ye Chen,

Jianyu Dou,

Jiahui Wang,

Jingyu Zhao,

Haojia Ao,

Yi Yang,

Yufeng Yue; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Yinan and Hu, Kejia and Chen, Ye and Dou, Jianyu and Wang, Jiahui and Zhao, Jingyu and Ao, Haojia and Yang, Yi and Yue, Yufeng}, title = {Video2Robo: 3DGS-based Synthetic Data from One Video Enables Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6695-6705} }
Attack for Defense: Adversarial Agents for Point Prompt Optimization Empowering Segment Anything Model: Xueyu Liu,

Xiaoyi Zhang,

Meilin Liu,

Guangze Shi,

Jia Shen,

Yujie Wang,

Cai Zhao,

Ziyuan He,

Yongfei Wu,

Mingqiang Wei,

Yongle Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xueyu and Zhang, Xiaoyi and Liu, Meilin and Shi, Guangze and Shen, Jia and Wang, Yujie and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang and Chen, Yongle}, title = {Attack for Defense: Adversarial Agents for Point Prompt Optimization Empowering Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6591-6600} }
Visual Prototype Conditioned Focal Region Generation for UAV-Based Object Detection: Wenhao Li,

Zimeng Wu,

Yu Wu,

Zehua Fu,

Jiaxin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Wu, Zimeng and Wu, Yu and Fu, Zehua and Chen, Jiaxin}, title = {Visual Prototype Conditioned Focal Region Generation for UAV-Based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3772-3782} }
Multi-Scale Local Speculative Decoding for Image Generation: Elia Peruzzo,

Guillaume Sautière,

Amirhossein Habibian; [pdf] [supp]
[bibtex]
@InProceedings{Peruzzo_2026_CVPR, author = {Peruzzo, Elia and Sauti\`ere, Guillaume and Habibian, Amirhossein}, title = {Multi-Scale Local Speculative Decoding for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5253-5262} }
Probabilistic Discrepancy Learning for Roadside LiDAR Scene Completion: Xiaogang Wu,

Jinchao Hu,

Zixian Wang,

Dun Liu,

BoXiang Cheng,

Yiqiang Wu; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaogang and Hu, Jinchao and Wang, Zixian and Liu, Dun and Cheng, BoXiang and Wu, Yiqiang}, title = {Probabilistic Discrepancy Learning for Roadside LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9955-9964} }
Hilbert Curve-Based Attention Enabling Topology-Preserving Image Tensor Representation for Semantic Segmentation Network: Linkang Xu,

Gang Li,

Yue Song,

Xiangxin Ji; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Linkang and Li, Gang and Song, Yue and Ji, Xiangxin}, title = {Hilbert Curve-Based Attention Enabling Topology-Preserving Image Tensor Representation for Semantic Segmentation Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13113-13122} }
Language-driven Fine-grained Retrieval: Shijie Wang,

Xin Yu,

Yadan Luo,

Zijian Wang,

Pengfei Zhang,

Zi Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shijie and Yu, Xin and Luo, Yadan and Wang, Zijian and Zhang, Pengfei and Huang, Zi}, title = {Language-driven Fine-grained Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2682-2692} }
X-AVDT: Audio-Visual Cross-Attention for Robust Deepfake Detection: Youngseo Kim,

Kwan Yun,

Seokhyeon Hong,

Sihun Cha,

Colette Suhjung Koo,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Youngseo and Yun, Kwan and Hong, Seokhyeon and Cha, Sihun and Koo, Colette Suhjung and Noh, Junyong}, title = {X-AVDT: Audio-Visual Cross-Attention for Robust Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4403-4414} }
SURF: Signature-Retained Fast Video Generation: Kaixin Ding,

Xi Chen,

Sihui Ji,

Yuan Gao,

Liang Hou,

Xin Tao,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Kaixin and Chen, Xi and Ji, Sihui and Gao, Yuan and Hou, Liang and Tao, Xin and Zhao, Hengshuang}, title = {SURF: Signature-Retained Fast Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9171-9181} }
Meta-Learning In-Context Enables Training-Free Cross Subject Brain Decoding: Mu Nan,

Muquan Yu,

Weijian Mai,

Jacob S. Prince,

Hossein Adeli,

Rui Zhang,

Jiahang Cao,

Benjamin Becker,

John A. Pyles,

Margaret M. Henderson,

Chunfeng Song,

Nikolaus Kriegeskorte,

Michael J. Tarr,

Xiaoqing Hu,

Andrew F. Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nan_2026_CVPR, author = {Nan, Mu and Yu, Muquan and Mai, Weijian and Prince, Jacob S. and Adeli, Hossein and Zhang, Rui and Cao, Jiahang and Becker, Benjamin and Pyles, John A. and Henderson, Margaret M. and Song, Chunfeng and Kriegeskorte, Nikolaus and Tarr, Michael J. and Hu, Xiaoqing and Luo, Andrew F.}, title = {Meta-Learning In-Context Enables Training-Free Cross Subject Brain Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3604-3616} }
fMRI-LM: Towards a Universal Foundation Model for Language-Aligned fMRI Understanding: Yuxiang Wei,

Yanteng Zhang,

Xi Xiao,

Chengxuan Qian,

Tianyang Wang,

Vince D. Calhoun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yuxiang and Zhang, Yanteng and Xiao, Xi and Qian, Chengxuan and Wang, Tianyang and Calhoun, Vince D.}, title = {fMRI-LM: Towards a Universal Foundation Model for Language-Aligned fMRI Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6931-6940} }
CLEX: Complementary Label Exchange Learning for Noisy Facial Expression Recognition: Lin Wang,

Fang Liu,

Xiaofen Xing,

Kailing Guo,

Xiangmin Xu; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lin and Liu, Fang and Xing, Xiaofen and Guo, Kailing and Xu, Xiangmin}, title = {CLEX: Complementary Label Exchange Learning for Noisy Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10577-10586} }
OneHOI: Unifying Human-Object Interaction Generation and Editing: Jiun Tian Hoe,

Weipeng Hu,

Xudong Jiang,

Yap-Peng Tan,

Chee Seng Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hoe_2026_CVPR, author = {Hoe, Jiun Tian and Hu, Weipeng and Jiang, Xudong and Tan, Yap-Peng and Chan, Chee Seng}, title = {OneHOI: Unifying Human-Object Interaction Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7664-7673} }
Rel-Zero: Harnessing Patch-Pair Invariance for Robust Zero-Watermarking Against AI Editing: Pengzhen Chen,

Yanwei Liu,

Xiaoyan Gu,

Xiaojun Chen,

Wu Liu,

Weiping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhen and Liu, Yanwei and Gu, Xiaoyan and Chen, Xiaojun and Liu, Wu and Wang, Weiping}, title = {Rel-Zero: Harnessing Patch-Pair Invariance for Robust Zero-Watermarking Against AI Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3337-3346} }
Joint Spectral Image Reconstruction and Semantic Segmentation with Cooperative Unfolding: Zijun He,

Ping Wang,

Xiaodong Wang,

Chang Chen,

Xin Yuan; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zijun and Wang, Ping and Wang, Xiaodong and Chen, Chang and Yuan, Xin}, title = {Joint Spectral Image Reconstruction and Semantic Segmentation with Cooperative Unfolding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6910-6919} }
NOWA: Null-space Optical Watermark for Invisible Capture Fingerprinting and Tamper Localization: Edwin Vargas,

Jhon Lopez,

Henry Arguello,

Ashok Veeraraghavan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vargas_2026_CVPR, author = {Vargas, Edwin and Lopez, Jhon and Arguello, Henry and Veeraraghavan, Ashok}, title = {NOWA: Null-space Optical Watermark for Invisible Capture Fingerprinting and Tamper Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {102-112} }
Spectral Scalpel: Amplifying Adjacent Action Discrepancy via Frequency-Selective Filtering for Skeleton-Based Action Segmentation: Haoyu Ji,

Bowen Chen,

Zhihao Yang,

Wenze Huang,

Yu Gao,

Xueting Liu,

Weihong Ren,

Zhiyong Wang,

Honghai Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Haoyu and Chen, Bowen and Yang, Zhihao and Huang, Wenze and Gao, Yu and Liu, Xueting and Ren, Weihong and Wang, Zhiyong and Liu, Honghai}, title = {Spectral Scalpel: Amplifying Adjacent Action Discrepancy via Frequency-Selective Filtering for Skeleton-Based Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12849-12859} }
SemiGDA: Generative Dual-distribution Alignment for Semi-Supervised Medical Image Segmentation: Kaiwen Huang,

Yi Zhou,

Yizhe Zhang,

Jingxiong Li,

Tao Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Kaiwen and Zhou, Yi and Zhang, Yizhe and Li, Jingxiong and Zhou, Tao}, title = {SemiGDA: Generative Dual-distribution Alignment for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1450-1460} }
DuetSVG: Unified Multimodal SVG Generation with Internal Visual Guidance: Peiying Zhang,

Nanxuan Zhao,

Matthew Fisher,

Yiran Xu,

Jing Liao,

Difan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peiying and Zhao, Nanxuan and Fisher, Matthew and Xu, Yiran and Liao, Jing and Liu, Difan}, title = {DuetSVG: Unified Multimodal SVG Generation with Internal Visual Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10219-10229} }
FACE: A Face-based Autoregressive Representation for High-Fidelity and Efficient Mesh Generation: Hanxiao Wang,

Yuan-Chen Guo,

Ying-Tian Liu,

Zi-Xin Zou,

Biao Zhang,

Weize Quan,

Ding Liang,

Yan-Pei Cao,

Dong-Ming Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hanxiao and Guo, Yuan-Chen and Liu, Ying-Tian and Zou, Zi-Xin and Zhang, Biao and Quan, Weize and Liang, Ding and Cao, Yan-Pei and Yan, Dong-Ming}, title = {FACE: A Face-based Autoregressive Representation for High-Fidelity and Efficient Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12719-12729} }
MatE: Material Extraction from Single-Image via Geometric Prior: Zeyu Zhang,

Wei Zhai,

Jian Yang,

Yang Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zeyu and Zhai, Wei and Yang, Jian and Cao, Yang}, title = {MatE: Material Extraction from Single-Image via Geometric Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12480-12490} }
GenMatter: Perceiving Physical Objects with Generative Matter Models: Eric Li,

Arijit Dasgupta,

Yoni Friedman,

Mathieu Huot,

Vikash Mansinghka,

Thomas O'Connell,

William T. Freeman,

Joshua B. Tenenbaum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Eric and Dasgupta, Arijit and Friedman, Yoni and Huot, Mathieu and Mansinghka, Vikash and O'Connell, Thomas and Freeman, William T. and Tenenbaum, Joshua B.}, title = {GenMatter: Perceiving Physical Objects with Generative Matter Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3165-3175} }
SPDMark: Selective Parameter Displacement for Robust Video Watermarking: Samar Fares,

Nurbek Tastan,

Karthik Nandakumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fares_2026_CVPR, author = {Fares, Samar and Tastan, Nurbek and Nandakumar, Karthik}, title = {SPDMark: Selective Parameter Displacement for Robust Video Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10303-10312} }
Seeing What Matters: A Training-Free Self-Guided Framework for Multimodal Detail Perception and Reasoning: Mingjie Ma,

yichao ma,

Zhong Yang,

Guohui Li; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Mingjie and ma, yichao and Yang, Zhong and Li, Guohui}, title = {Seeing What Matters: A Training-Free Self-Guided Framework for Multimodal Detail Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8727-8736} }
MedMO: Grounding and Understanding Multimodal Large Language Model for Medical Images: Ankan Deria,

Komal Kumar,

Adinath Madhavrao Dukre,

Eran Segal,

Salman Khan,

Imran Razzak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deria_2026_CVPR, author = {Deria, Ankan and Kumar, Komal and Dukre, Adinath Madhavrao and Segal, Eran and Khan, Salman and Razzak, Imran}, title = {MedMO: Grounding and Understanding Multimodal Large Language Model for Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5092-5103} }
P2GS: Physical Prior-guided Gaussian Splatting for Photometrically Consistent Urban Reconstruction: Kota Shimomura,

Hidehisa Arai,

Tsubasa Takahashi,

Takayoshi Yamashita,

Hironobu Fujiyoshi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shimomura_2026_CVPR, author = {Shimomura, Kota and Arai, Hidehisa and Takahashi, Tsubasa and Yamashita, Takayoshi and Fujiyoshi, Hironobu}, title = {P2GS: Physical Prior-guided Gaussian Splatting for Photometrically Consistent Urban Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11736-11745} }
UNI-OOD: Unified Object- and Image-level Out-of-Distribution Detection via Cross-Context Attentive Vision-Language Modeling: Yuchuan Li,

Azadeh Motamedi,

Hyock Ju Kwon,

Chul B Park,

Il-Min Kim; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuchuan and Motamedi, Azadeh and Kwon, Hyock Ju and Park, Chul B and Kim, Il-Min}, title = {UNI-OOD: Unified Object- and Image-level Out-of-Distribution Detection via Cross-Context Attentive Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6282-6292} }
Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval: Zhiheng Fu,

Yupeng Hu,

Qianyun Yang,

Shiqi Zhang,

Zhiwei Chen,

Zixu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Zhiheng and Hu, Yupeng and Yang, Qianyun and Zhang, Shiqi and Chen, Zhiwei and Li, Zixu}, title = {Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2658-2670} }
MeshSplatting: Differentiable Rendering with Opaque Meshes: Jan Held,

Sanghyun Son,

Renaud Vandeghen,

Daniel Rebain,

Matheus Gadelha,

Yi Zhou,

Anthony Cioppa,

Ming C. Lin,

Marc Van Droogenbroeck,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Held_2026_CVPR, author = {Held, Jan and Son, Sanghyun and Vandeghen, Renaud and Rebain, Daniel and Gadelha, Matheus and Zhou, Yi and Cioppa, Anthony and Lin, Ming C. and Van Droogenbroeck, Marc and Tagliasacchi, Andrea}, title = {MeshSplatting: Differentiable Rendering with Opaque Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7320-7329} }
CUPID: Generative 3D Reconstruction via Joint Object and Pose Modeling: Binbin Huang,

Haobin Duan,

Yiqun Zhao,

Zibo Zhao,

Yi Ma,

Shenghua Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Binbin and Duan, Haobin and Zhao, Yiqun and Zhao, Zibo and Ma, Yi and Gao, Shenghua}, title = {CUPID: Generative 3D Reconstruction via Joint Object and Pose Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12741-12752} }
Semantic-Guided Global-Local Collaborative Prompt Learning for Few-Shot Class Incremental Learning: Yongxin Yan,

Weisen Chen,

Xingye Chen,

Yuanjie Shao,

Zhengrong Zuo,

Wenming Tan,

Wenqi Ren,

Changxin Gao,

Nong Sang; [pdf]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Yongxin and Chen, Weisen and Chen, Xingye and Shao, Yuanjie and Zuo, Zhengrong and Tan, Wenming and Ren, Wenqi and Gao, Changxin and Sang, Nong}, title = {Semantic-Guided Global-Local Collaborative Prompt Learning for Few-Shot Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5511-5520} }
FM-Steer: Enhance Generalist Policies with Value-Guided Cascaded Denoising: Haoming Song,

Delin Qu,

Yuanqi Yao,

Qizhi Chen,

Jiarui Li,

Qi Lv,

Yiwen Tang,

Li Kang,

Heng Zhou,

Xianqiang Gao,

Yuhang Tang,

Xiaofan Li,

Modi Shi,

Guanghui Ren,

Maoqing Yao,

Bin Zhao,

Dong Wang,

Xuelong Li; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Haoming and Qu, Delin and Yao, Yuanqi and Chen, Qizhi and Li, Jiarui and Lv, Qi and Tang, Yiwen and Kang, Li and Zhou, Heng and Gao, Xianqiang and Tang, Yuhang and Li, Xiaofan and Shi, Modi and Ren, Guanghui and Yao, Maoqing and Zhao, Bin and Wang, Dong and Li, Xuelong}, title = {FM-Steer: Enhance Generalist Policies with Value-Guided Cascaded Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13407-13418} }
LLM-Guided Probabilistic Fusion for Label-Efficient Document Layout Analysis: Ibne Farabi Shihab,

Sanjeda Akter,

Anuj Sharma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shihab_2026_CVPR, author = {Shihab, Ibne Farabi and Akter, Sanjeda and Sharma, Anuj}, title = {LLM-Guided Probabilistic Fusion for Label-Efficient Document Layout Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3574-3583} }
COPO: Causal-Oriented Policy Optimization for Hallucinations of MLLMs: Peizheng Guo,

Jingyao Wang,

Wenwen Qiang,

Jiahuan Zhou,

Changwen Zheng,

Gang Hua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Peizheng and Wang, Jingyao and Qiang, Wenwen and Zhou, Jiahuan and Zheng, Changwen and Hua, Gang}, title = {COPO: Causal-Oriented Policy Optimization for Hallucinations of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11051-11063} }
AutoTraces: Autoregressive Trajectory Forecasting via Multimodal Large Language Models: Teng Wang,

Yanting Lu,

Ruize Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Teng and Lu, Yanting and Wang, Ruize}, title = {AutoTraces: Autoregressive Trajectory Forecasting via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4054-4064} }
Bidirectional Cross-Modal Prompting for Event-Frame Asymmetric Stereo: Ninghui Xu,

Fabio Tosi,

Lihui Wang,

Jiawei Han,

Luca Bartolomei,

Zhiting Yao,

Matteo Poggi,

Stefano Mattoccia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Ninghui and Tosi, Fabio and Wang, Lihui and Han, Jiawei and Bartolomei, Luca and Yao, Zhiting and Poggi, Matteo and Mattoccia, Stefano}, title = {Bidirectional Cross-Modal Prompting for Event-Frame Asymmetric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {914-925} }
Disco-GS: Gaussian Splatting in Dynamic Color Lighting: Ashish Kumar,

A. N. Rajagopalan; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and Rajagopalan, A. N.}, title = {Disco-GS: Gaussian Splatting in Dynamic Color Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11891-11900} }
PureCC: Pure Learning for Text-to-Image Concept Customization: Zhichao Liao,

Xiaole Xian,

Qingyu Li,

Wenyu Qin,

Meng Wang,

Weicheng Xie,

Siyang Song,

Pingfa Feng,

Long Zeng,

Liang Pan; [pdf] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Zhichao and Xian, Xiaole and Li, Qingyu and Qin, Wenyu and Wang, Meng and Xie, Weicheng and Song, Siyang and Feng, Pingfa and Zeng, Long and Pan, Liang}, title = {PureCC: Pure Learning for Text-to-Image Concept Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7730-7740} }
Scaling Dense Event-Stream Pretraining from Visual Foundation Models: Zhiwen Chen,

Junhui Hou,

Zhiyu Zhu,

Jinjian Wu,

Guangming Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhiwen and Hou, Junhui and Zhu, Zhiyu and Wu, Jinjian and Shi, Guangming}, title = {Scaling Dense Event-Stream Pretraining from Visual Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8011-8022} }
CapNav: Benchmarking Vision Language Models on Capability-conditioned Indoor Navigation: Xia Su,

Ruiqi Chen,

Benlin Liu,

Jingwei Ma,

Zonglin Di,

Ranjay Krishna,

Jon Froehlich; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Xia and Chen, Ruiqi and Liu, Benlin and Ma, Jingwei and Di, Zonglin and Krishna, Ranjay and Froehlich, Jon}, title = {CapNav: Benchmarking Vision Language Models on Capability-conditioned Indoor Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4043-4053} }
Back to Point: Exploring Point-Language Models for Zero-Shot 3D Anomaly Detection: Kaiqiang Li,

Gang Li,

Mingle Zhou,

Min Li,

Delong Han,

Jin Wan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kaiqiang and Li, Gang and Zhou, Mingle and Li, Min and Han, Delong and Wan, Jin}, title = {Back to Point: Exploring Point-Language Models for Zero-Shot 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14167-14177} }
Transition Matching Distillation for Fast Video Generation: Weili Nie,

Julius Berner,

Nanye Ma,

Chao Liu,

Saining Xie,

Arash Vahdat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Weili and Berner, Julius and Ma, Nanye and Liu, Chao and Xie, Saining and Vahdat, Arash}, title = {Transition Matching Distillation for Fast Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4645-4655} }
MoCapAnything: Unified 3D Motion Capture for Arbitrary Skeletons from Monocular Videos: Kehong Gong,

Zhengyu Wen,

Weixia He,

Mingxi Xu,

Qi Wang,

Ning Zhang,

Zhengyu Li,

Dongze Lian,

Wei Zhao,

Xiaoyu He,

Mingyuan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Kehong and Wen, Zhengyu and He, Weixia and Xu, Mingxi and Wang, Qi and Zhang, Ning and Li, Zhengyu and Lian, Dongze and Zhao, Wei and He, Xiaoyu and Zhang, Mingyuan}, title = {MoCapAnything: Unified 3D Motion Capture for Arbitrary Skeletons from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7089-7099} }
Bootstrapping Multi-view Learning for Test-time Noisy Correspondence: Changhao He,

Di Xue,

Shuxian Li,

Yanji Hao,

Xi Peng,

Peng Hu; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Changhao and Xue, Di and Li, Shuxian and Hao, Yanji and Peng, Xi and Hu, Peng}, title = {Bootstrapping Multi-view Learning for Test-time Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1627-1638} }
Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models: Yuehao Liu,

Shanyan Guan,

Weijia Zhang,

Xuanming Shang,

Yanhao Ge,

Wei Li,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuehao and Guan, Shanyan and Zhang, Weijia and Shang, Xuanming and Ge, Yanhao and Li, Wei and Ma, Chao}, title = {Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3898-3907} }
CLiViS: Unleashing Cognitive Map through Linguistic-Visual Synergy for Embodied Visual Reasoning: Kailing Li,

Qi'ao Xu,

Tianwen Qian,

Yuqian Fu,

Yang Jiao,

Xiaoling Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kailing and Xu, Qi'ao and Qian, Tianwen and Fu, Yuqian and Jiao, Yang and Wang, Xiaoling}, title = {CLiViS: Unleashing Cognitive Map through Linguistic-Visual Synergy for Embodied Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5134-5143} }
TEAR: Temporal-aware Automated Red-teaming for Text-to-Video Models: Jiaming He,

Guanyu Hou,

Hongwei Li,

Zhicong Huang,

Kangjie Chen,

Yi Yu,

Wenbo Jiang,

Guowen Xu,

Tianwei Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Jiaming and Hou, Guanyu and Li, Hongwei and Huang, Zhicong and Chen, Kangjie and Yu, Yi and Jiang, Wenbo and Xu, Guowen and Zhang, Tianwei}, title = {TEAR: Temporal-aware Automated Red-teaming for Text-to-Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41-50} }
GroundingME: Exposing the Visual Grounding Gap in MLLMs through Multi-Dimensional Evaluation: Rang Li,

Lei Li,

Shuhuai Ren,

Hao Tian,

Shuhao Gu,

Shicheng Li,

Zihao Yue,

Yudong Wang,

Wenhan Ma,

Zhe Yang,

Jingyuan Ma,

Zhifang Sui,

Fuli Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Rang and Li, Lei and Ren, Shuhuai and Tian, Hao and Gu, Shuhao and Li, Shicheng and Yue, Zihao and Wang, Yudong and Ma, Wenhan and Yang, Zhe and Ma, Jingyuan and Sui, Zhifang and Luo, Fuli}, title = {GroundingME: Exposing the Visual Grounding Gap in MLLMs through Multi-Dimensional Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2412-2422} }
Aesthetic Camera Viewpoint Suggestion with 3D Aesthetic Field: Sheyang Tang,

Armin Shafiee Sarvestani,

Jialu Xu,

Xiaoyu Xu,

Zhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Sheyang and Sarvestani, Armin Shafiee and Xu, Jialu and Xu, Xiaoyu and Wang, Zhou}, title = {Aesthetic Camera Viewpoint Suggestion with 3D Aesthetic Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8278-8287} }
FloodDiffusion: Tailored Diffusion Forcing for Streaming Motion Generation: Yiyi Cai,

Yuhan Wu,

Kunhang Li,

You Zhou,

Bo Zheng,

Haiyang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Yiyi and Wu, Yuhan and Li, Kunhang and Zhou, You and Zheng, Bo and Liu, Haiyang}, title = {FloodDiffusion: Tailored Diffusion Forcing for Streaming Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2295-2304} }
MeshRipple: Structured Autoregressive Generation of Artist-Meshes: Junkai Lin,

Hang Long,

Huipeng Guo,

Jielei Zhang,

Jiayi Yang,

Tianle Guo,

Yang Yang,

Jianwen Li,

Wenxiao ZHANG,

Matthias Nießner,

Wei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Junkai and Long, Hang and Guo, Huipeng and Zhang, Jielei and Yang, Jiayi and Guo, Tianle and Yang, Yang and Li, Jianwen and ZHANG, Wenxiao and Nie{\ss}ner, Matthias and Yang, Wei}, title = {MeshRipple: Structured Autoregressive Generation of Artist-Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12706-12718} }
BiGMINT: Biologically-guided Hierarchical Multimodal Integration for Modeling Multiple Compound Activities in Drug Discovery: Pushpak Pati,

Bo Li,

Abbas Rayabat Khan,

Tomé Albuquerque,

Steffen Jaensch,

Amina Mollaysa,

Walid M. Abdelmoula,

Samantha J. Allen,

Joke Reumers,

Helai P. Mohammad,

Scott Oloff,

Tommaso Mansi,

Rui Liao,

Dmytro S. Lituiev,

Zhoubing Xu; [pdf] [supp]
[bibtex]
@InProceedings{Pati_2026_CVPR, author = {Pati, Pushpak and Li, Bo and Khan, Abbas Rayabat and Albuquerque, Tom\'e and Jaensch, Steffen and Mollaysa, Amina and Abdelmoula, Walid M. and Allen, Samantha J. and Reumers, Joke and Mohammad, Helai P. and Oloff, Scott and Mansi, Tommaso and Liao, Rui and Lituiev, Dmytro S. and Xu, Zhoubing}, title = {BiGMINT: Biologically-guided Hierarchical Multimodal Integration for Modeling Multiple Compound Activities in Drug Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6982-6993} }
Beyond the Static-World: Lifelong Learning for All-in-One Medical Image Restoration: Shihao Shan,

Hongying Liu,

Fanhua Shang,

Liang Wan,

Jingjing Deng; [pdf]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Shihao and Liu, Hongying and Shang, Fanhua and Wan, Liang and Deng, Jingjing}, title = {Beyond the Static-World: Lifelong Learning for All-in-One Medical Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13702-13711} }
Text-Image Conditioned 3D Generation: Jiazhong Cen,

Jiemin Fang,

Sikuang Li,

Guanjun Wu,

Chen Yang,

Taoran Yi,

Zanwei Zhou,

Zhikuan Bao,

Lingxi Xie,

Wei Shen,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cen_2026_CVPR, author = {Cen, Jiazhong and Fang, Jiemin and Li, Sikuang and Wu, Guanjun and Yang, Chen and Yi, Taoran and Zhou, Zanwei and Bao, Zhikuan and Xie, Lingxi and Shen, Wei and Tian, Qi}, title = {Text-Image Conditioned 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {604-614} }
SV-GS: Sparse View 4D Reconstruction with Skeleton-Driven Gaussian Splatting: Jun-Jee Chao,

Volkan Isler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chao_2026_CVPR, author = {Chao, Jun-Jee and Isler, Volkan}, title = {SV-GS: Sparse View 4D Reconstruction with Skeleton-Driven Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5027-5037} }
Think 360deg: Beyond Depth: Evaluating the Width-centric Reasoning Capability of MLLMs: Mingrui Chen,

Hexiong Yang,

Haogeng Liu,

Huaibo Huang,

Ran He; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Mingrui and Yang, Hexiong and Liu, Haogeng and Huang, Huaibo and He, Ran}, title = {Think 360deg: Beyond Depth: Evaluating the Width-centric Reasoning Capability of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5211-5220} }
ORBIT: Benchmarking SfM in the Wild with 360deg Video: Sara Sabour,

Richard Tucker,

Marcus Brubaker,

Saurabh Saxena,

Junhwa Hur,

Andrea Tagliasacchi,

Deqing Sun,

David J. Fleet,

Richard Szeliski,

Noah Snavely; [pdf] [supp]
[bibtex]
@InProceedings{Sabour_2026_CVPR, author = {Sabour, Sara and Tucker, Richard and Brubaker, Marcus and Saxena, Saurabh and Hur, Junhwa and Tagliasacchi, Andrea and Sun, Deqing and Fleet, David J. and Szeliski, Richard and Snavely, Noah}, title = {ORBIT: Benchmarking SfM in the Wild with 360deg Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6791-6801} }
VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition: Tanush Yadav,

Mohammadreza Salehi,

Jae Sung Park,

Vivek Ramanujan,

Hannaneh Hajishirzi,

Yejin Choi,

Ali Farhadi,

Rohun Tripathi,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yadav_2026_CVPR, author = {Yadav, Tanush and Salehi, Mohammadreza and Park, Jae Sung and Ramanujan, Vivek and Hajishirzi, Hannaneh and Choi, Yejin and Farhadi, Ali and Tripathi, Rohun and Krishna, Ranjay}, title = {VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12881-12891} }
Deeper Thought, Weaker Aim: Understanding and Mitigating Perceptual Impairment during Reasoning in Multimodal Large Language Models: Ruiying Peng,

Xueyu Wu,

Jing Lei,

Lu Hou,

Yuanzheng Ma,

Xiao-Hui Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Ruiying and Wu, Xueyu and Lei, Jing and Hou, Lu and Ma, Yuanzheng and Li, Xiao-Hui}, title = {Deeper Thought, Weaker Aim: Understanding and Mitigating Perceptual Impairment during Reasoning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12064-12073} }
Synthetic Object Compositions for Scalable and Accurate Learning in Detection, Segmentation, and Grounding: Weikai Huang,

Jieyu Zhang,

Taoyang Jia,

Chenhao Zheng,

Ziqi Gao,

Jae Sung Park,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Weikai and Zhang, Jieyu and Jia, Taoyang and Zheng, Chenhao and Gao, Ziqi and Park, Jae Sung and Krishna, Ranjay}, title = {Synthetic Object Compositions for Scalable and Accurate Learning in Detection, Segmentation, and Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6388-6398} }
Towards Generalized Multimodal Homography Estimation: Jinkun You,

Jiaxin Cheng,

Jie Zhang,

Yicong Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Jinkun and Cheng, Jiaxin and Zhang, Jie and Zhou, Yicong}, title = {Towards Generalized Multimodal Homography Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8408-8417} }
SMV-EAR: Bring Spatiotemporal Multi-View Representation Learning into Efficient Event-Based Action Recognition: Rui Fan,

Weidong Hao,

Juntao Guan,

Lai Rui,

Tong Wu,

Fanhong Zeng,

Lin Gu; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Rui and Hao, Weidong and Guan, Juntao and Rui, Lai and Wu, Tong and Zeng, Fanhong and Gu, Lin}, title = {SMV-EAR: Bring Spatiotemporal Multi-View Representation Learning into Efficient Event-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6043-6053} }
Render-to-Adapt: Unsupervised Personal Adaptation for Gaze Estimation: Yangshi Ge,

Zheng Liu,

Feng Lu; [pdf]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Yangshi and Liu, Zheng and Lu, Feng}, title = {Render-to-Adapt: Unsupervised Personal Adaptation for Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3101-3110} }
SharpTimeGS: Sharp and Stable Dynamic Gaussian Splatting via Lifespan Modulation: Zhanfeng Liao,

Jiajun Zhang,

Hanzhang Tu,

Zhixi Wang,

Yunqi Gao,

Hongwen Zhang,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Zhanfeng and Zhang, Jiajun and Tu, Hanzhang and Wang, Zhixi and Gao, Yunqi and Zhang, Hongwen and Liu, Yebin}, title = {SharpTimeGS: Sharp and Stable Dynamic Gaussian Splatting via Lifespan Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11798-11807} }
A Combination of Noise and Bilateral Filters Achieve Supralinear and Scalable Adversarial Robustness in CNNs: Nicolas Stalder,

Benjamin F. Grewe,

Matteo Saponati,

Pau Vilimelis Aceituno; [pdf] [supp]
[bibtex]
@InProceedings{Stalder_2026_CVPR, author = {Stalder, Nicolas and Grewe, Benjamin F. and Saponati, Matteo and Aceituno, Pau Vilimelis}, title = {A Combination of Noise and Bilateral Filters Achieve Supralinear and Scalable Adversarial Robustness in CNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6559-6568} }
Gau-Occ: Geometry-Completed Gaussians for Multi-Modal 3D Occupancy Prediction: Chengxin Lv,

Yihui Li,

Hongyu Yang,

YunHong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Chengxin and Li, Yihui and Yang, Hongyu and Wang, YunHong}, title = {Gau-Occ: Geometry-Completed Gaussians for Multi-Modal 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14198-14207} }
Splatent: Splatting Diffusion Latents for Novel View Synthesis: Or Hirschorn,

Omer Sela,

Inbar Huberman-Spiegelglas,

Netalee Efrat,

Eli Alshan,

Ianir Ideses,

Frederic Devernay,

Yochai Zvik,

Lior Fritz; [pdf] [supp]
[bibtex]
@InProceedings{Hirschorn_2026_CVPR, author = {Hirschorn, Or and Sela, Omer and Huberman-Spiegelglas, Inbar and Efrat, Netalee and Alshan, Eli and Ideses, Ianir and Devernay, Frederic and Zvik, Yochai and Fritz, Lior}, title = {Splatent: Splatting Diffusion Latents for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8319-8330} }
Franca: Nested Matryoshka Clustering for Scalable Visual Representation Learning: Shashanka Venkataramanan,

Valentinos Pariza,

Mohammadreza Salehi,

Lukas Knobel,

Elias Ramzi,

Spyros Gidaris,

Andrei Bursuc,

Yuki M Asano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Venkataramanan_2026_CVPR, author = {Venkataramanan, Shashanka and Pariza, Valentinos and Salehi, Mohammadreza and Knobel, Lukas and Ramzi, Elias and Gidaris, Spyros and Bursuc, Andrei and Asano, Yuki M}, title = {Franca: Nested Matryoshka Clustering for Scalable Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10533-10544} }
Benchmarking Single-Factor Physical Video-to-Audio Generation: Tingle Li,

Siddharth Gururani,

Kevin J. Shih,

Gantavya Bhatt,

Sang-gil Lee,

Zhifeng Kong,

Arushi Goel,

Gopala Anumanchipalli,

Ming-Yu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tingle and Gururani, Siddharth and Shih, Kevin J. and Bhatt, Gantavya and Lee, Sang-gil and Kong, Zhifeng and Goel, Arushi and Anumanchipalli, Gopala and Liu, Ming-Yu}, title = {Benchmarking Single-Factor Physical Video-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1939-1949} }
UniSH: Unifying Scene and Human Reconstruction in a Feed-Forward Pass: Mengfei Li,

Peng Li,

Zheng Zhang,

Jiahao Lu,

Chengfeng Zhao,

Wei Xue,

Qifeng Liu,

Sida Peng,

Wenxiao Zhang,

Wenhan Luo,

Yuan Liu,

Yike Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengfei and Li, Peng and Zhang, Zheng and Lu, Jiahao and Zhao, Chengfeng and Xue, Wei and Liu, Qifeng and Peng, Sida and Zhang, Wenxiao and Luo, Wenhan and Liu, Yuan and Guo, Yike}, title = {UniSH: Unifying Scene and Human Reconstruction in a Feed-Forward Pass}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14038-14049} }
SRPO: Self-Referential Policy Optimization for Vision-Language-Action Models: Senyu Fei,

Siyin Wang,

Li Ji,

Ao Li,

Shiduo Zhang,

Liming Liu,

Jinlong Hou,

Jingjing Gong,

Xianzhong Zhao,

Xipeng Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fei_2026_CVPR, author = {Fei, Senyu and Wang, Siyin and Ji, Li and Li, Ao and Zhang, Shiduo and Liu, Liming and Hou, Jinlong and Gong, Jingjing and Zhao, Xianzhong and Qiu, Xipeng}, title = {SRPO: Self-Referential Policy Optimization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6718-6728} }
Differences That Matter: Auditing Models for Capability Gap Discovery and Rectification: Qihao Liu,

Chengzhi Mao,

Yaojie Liu,

Alan Yuille,

Wen-Sheng Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qihao and Mao, Chengzhi and Liu, Yaojie and Yuille, Alan and Chu, Wen-Sheng}, title = {Differences That Matter: Auditing Models for Capability Gap Discovery and Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1639-1650} }
FVBench: Benchmarking Deepfake Video Detection Capability of Large Multimodal Models: Jiarui Wang,

Huiyu Duan,

Juntong Wang,

Xiongkuo Min; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiarui and Duan, Huiyu and Wang, Juntong and Min, Xiongkuo}, title = {FVBench: Benchmarking Deepfake Video Detection Capability of Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4425-4437} }
MTA: Multimodal Task Alignment for BEV Perception and Captioning: Yunsheng Ma,

Burhaneddin Yaman,

Xin Ye,

Jingru Luo,

Feng Tao,

Abhirup Mallik,

Ziran Wang,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yunsheng and Yaman, Burhaneddin and Ye, Xin and Luo, Jingru and Tao, Feng and Mallik, Abhirup and Wang, Ziran and Ren, Liu}, title = {MTA: Multimodal Task Alignment for BEV Perception and Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {670-679} }
GeoSAM2: Unleashing the Power of SAM2 for 3D Part Segmentation: Ken Deng,

Yunhan Yang,

Jingxiang Sun,

Xihui Liu,

Yebin Liu,

Ding Liang,

Yan-Pei Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Ken and Yang, Yunhan and Sun, Jingxiang and Liu, Xihui and Liu, Yebin and Liang, Ding and Cao, Yan-Pei}, title = {GeoSAM2: Unleashing the Power of SAM2 for 3D Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6367-6376} }
Time Without Time: Pseudo-Temporal Representation for Space-Time Super-Resolution: Hee Min Choi,

Hyoa Kang,

Suji Kim,

Dokwan Oh,

Nam Ik Cho; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Hee Min and Kang, Hyoa and Kim, Suji and Oh, Dokwan and Cho, Nam Ik}, title = {Time Without Time: Pseudo-Temporal Representation for Space-Time Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6812-6822} }
Unsafe2Safe: Controllable Image Anonymization for Downstream Utility: Minh Dinh,

SouYoung Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dinh_2026_CVPR, author = {Dinh, Minh and Jin, SouYoung}, title = {Unsafe2Safe: Controllable Image Anonymization for Downstream Utility}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3326-3336} }
Exploring Adaptive Masked Reconstruction for Self-Supervised Skeleton-Based Action Recognition: Shengkai Sun,

Zhiyong Cheng,

Zefan Zhang,

Jianfeng Dong,

Zhihui Li,

Meng Wang; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Shengkai and Cheng, Zhiyong and Zhang, Zefan and Dong, Jianfeng and Li, Zhihui and Wang, Meng}, title = {Exploring Adaptive Masked Reconstruction for Self-Supervised Skeleton-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13974-13983} }
FedARA: Resource-adaptive Low-rank Personalized Federated Learning via Anchor-driven Representation Alignment on Heterogeneous Edge Devices: Ruonan Zhao,

Zheng Wang,

Debin Liu,

Shijie Lv,

Laurence Tianruo Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruonan and Wang, Zheng and Liu, Debin and Lv, Shijie and Yang, Laurence Tianruo}, title = {FedARA: Resource-adaptive Low-rank Personalized Federated Learning via Anchor-driven Representation Alignment on Heterogeneous Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10357-10366} }
MV2UV: Generating High-quality UV Texture Maps with Multiview Prompts: Zheng Zhang,

Qinchuan Zhang,

Yuteng Ye,

Zhi Chen,

Penglei Ji,

Mengfei Li,

Wenxiao Zhang,

Yuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zheng and Zhang, Qinchuan and Ye, Yuteng and Chen, Zhi and Ji, Penglei and Li, Mengfei and Zhang, Wenxiao and Liu, Yuan}, title = {MV2UV: Generating High-quality UV Texture Maps with Multiview Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12684-12694} }
One-to-All Animation: Alignment-Free Character Animation and Image Pose Transfer: Shijun Shi,

Jing Xu,

Zhihang Li,

Chunli Peng,

Xiaoda Yang,

Lijing Lu,

Kai Hu,

Jiangning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Shijun and Xu, Jing and Li, Zhihang and Peng, Chunli and Yang, Xiaoda and Lu, Lijing and Hu, Kai and Zhang, Jiangning}, title = {One-to-All Animation: Alignment-Free Character Animation and Image Pose Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4011-4021} }
Rethinking UMM Visual Generation: Masked Modeling for Efficient Image-Only Pre-training: Peng Sun,

Jun Xie,

Tao Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Peng and Xie, Jun and Lin, Tao}, title = {Rethinking UMM Visual Generation: Masked Modeling for Efficient Image-Only Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2047-2057} }
E$^2$-SCI: Elastic Edge-Cloud Speculative Decoding via Credit Inertia: Senyao Li,

Haozhao Wang,

Zhaobai Jiang,

Zhanbo Jin,

Hao Fan,

Ruixuan Li; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Senyao and Wang, Haozhao and Jiang, Zhaobai and Jin, Zhanbo and Fan, Hao and Li, Ruixuan}, title = {E\${\textasciicircum}2\$-SCI: Elastic Edge-Cloud Speculative Decoding via Credit Inertia}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12944-12954} }
MMVIP: A Visible-infrared Paired Dataset for Multi-weather Marine Vision: Yunpeng Yin,

Lihan Wang,

Zhaoshen He,

Xinqiang He,

Xingming Liao,

Zhuowei Wang,

Lianglun Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Yunpeng and Wang, Lihan and He, Zhaoshen and He, Xinqiang and Liao, Xingming and Wang, Zhuowei and Cheng, Lianglun}, title = {MMVIP: A Visible-infrared Paired Dataset for Multi-weather Marine Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6432-6442} }
Language Does Matter for Cross-Domain Few-Shot Visual Feature Enhancement: Fei Zhou,

Xiwen Zhang,

Qingqing Qiu,

Lei Zhang,

Wei Wei,

Chen Ding,

Yi Zhang,

Liang Li,

Xiangyu Yue,

Yanning Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Fei and Zhang, Xiwen and Qiu, Qingqing and Zhang, Lei and Wei, Wei and Ding, Chen and Zhang, Yi and Li, Liang and Yue, Xiangyu and Zhang, Yanning}, title = {Language Does Matter for Cross-Domain Few-Shot Visual Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7946-7956} }
Pip-Stereo: Progressive Iterations Pruner for Iterative Optimization based Stereo Matching: Jintu Zheng,

Qizhe Liu,

Huangxin Xu,

Zhuojie Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jintu and Liu, Qizhe and Xu, Huangxin and Chen, Zhuojie}, title = {Pip-Stereo: Progressive Iterations Pruner for Iterative Optimization based Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7503-7512} }
Blink: Dynamic Visual Token Resolution for Enhanced Multimodal Understanding: Yuchen Feng,

Zhenyu Zhang,

Naibin Gu,

Yilong Chen,

Peng Fu,

Zheng Lin,

Shuohuan Wang,

Yu Sun,

Hua Wu,

Weiping Wang,

Haifeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yuchen and Zhang, Zhenyu and Gu, Naibin and Chen, Yilong and Fu, Peng and Lin, Zheng and Wang, Shuohuan and Sun, Yu and Wu, Hua and Wang, Weiping and Wang, Haifeng}, title = {Blink: Dynamic Visual Token Resolution for Enhanced Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3511-3521} }
PromptMoE: A Segmentation Refinement Framework Leveraging Mixture of Experts for Improved Prompting: Stephen Price,

Danielle L. Cote,

Elke A. Rundensteiner; [pdf] [supp]
[bibtex]
@InProceedings{Price_2026_CVPR, author = {Price, Stephen and Cote, Danielle L. and Rundensteiner, Elke A.}, title = {PromptMoE: A Segmentation Refinement Framework Leveraging Mixture of Experts for Improved Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6325-6335} }
Towards Reasoning-Preserving Unlearning in Multimodal Large Language Models: Hongji Li,

Manjiang Yu,

Junchi Yao,

Priyanka Singh,

Xue Li,

Di Wang,

Lijie Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hongji and Yu, Manjiang and Yao, Junchi and Singh, Priyanka and Li, Xue and Wang, Di and Hu, Lijie}, title = {Towards Reasoning-Preserving Unlearning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10251-10261} }
SpaceTimePilot: Generative Rendering of Dynamic Scenes Across Space and Time: Zhening Huang,

Hyeonho Jeong,

Xuelin Chen,

Yulia Gryaditskaya,

Tuanfeng Y. Wang,

Joan Lasenby,

Chun-Hao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhening and Jeong, Hyeonho and Chen, Xuelin and Gryaditskaya, Yulia and Wang, Tuanfeng Y. and Lasenby, Joan and Huang, Chun-Hao}, title = {SpaceTimePilot: Generative Rendering of Dynamic Scenes Across Space and Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11218-11228} }
Towards Cross-Modal Preservation, Consistency and Alignment for Privacy-Preserving Visible-Infrared Person Re-Identification: Yudi Xie,

Zhongao Zhou,

Bin Yang,

Zhenghan Chen,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yudi and Zhou, Zhongao and Yang, Bin and Chen, Zhenghan and Ye, Mang}, title = {Towards Cross-Modal Preservation, Consistency and Alignment for Privacy-Preserving Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11273-11282} }
Learning Hierarchical Hyperbolic Mixture Model for Part-aware 3D Generation: Qitong Yang,

Mingtao Feng,

Zijie Wu,

Huixin Zhu,

Weisheng Dong,

Yaonan Wang,

Ajmal Mian; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Qitong and Feng, Mingtao and Wu, Zijie and Zhu, Huixin and Dong, Weisheng and Wang, Yaonan and Mian, Ajmal}, title = {Learning Hierarchical Hyperbolic Mixture Model for Part-aware 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12695-12705} }
From Measurement to Mitigation: Quantifying and Reducing Identity Leakage in Image Representation Encoders with Linear Subspace Removal: Daniel George,

Charles Yeh,

Daniel Lee,

Yifei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{George_2026_CVPR, author = {George, Daniel and Yeh, Charles and Lee, Daniel and Zhang, Yifei}, title = {From Measurement to Mitigation: Quantifying and Reducing Identity Leakage in Image Representation Encoders with Linear Subspace Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3284-3293} }
SegEarth-R2: Towards Comprehensive Language-guided Segmentation for Remote Sensing Images: Zepeng Xin,

Kaiyu Li,

Luodi Chen,

Wanchen Li,

Xiao Yuchen,

Hui Qiao,

Weizhan Zhang,

Deyu Meng,

Xiangyong Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xin_2026_CVPR, author = {Xin, Zepeng and Li, Kaiyu and Chen, Luodi and Li, Wanchen and Yuchen, Xiao and Qiao, Hui and Zhang, Weizhan and Meng, Deyu and Cao, Xiangyong}, title = {SegEarth-R2: Towards Comprehensive Language-guided Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13199-13210} }
AvatarPointillist: AutoRegressive 4D Gaussian Avatarization: Hongyu Liu,

Xuan Wang,

Zijian Wu,

Yating Wang,

Ziyu Wan,

Yue Ma,

Runtao Liu,

Boyao Zhou,

Yujun Shen,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hongyu and Wang, Xuan and Wu, Zijian and Wang, Yating and Wan, Ziyu and Ma, Yue and Liu, Runtao and Zhou, Boyao and Shen, Yujun and Chen, Qifeng}, title = {AvatarPointillist: AutoRegressive 4D Gaussian Avatarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11039-11050} }
Out of Sight, Out of Track: Adversarial Attacks on Propagation-based Multi-Object Trackers via Query State Manipulation: Halima Bouzidi,

Haoyu Liu,

Yonatan Achamyeleh,

Praneetsai Iddamsetty,

Mohammad Al Faruque; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bouzidi_2026_CVPR, author = {Bouzidi, Halima and Liu, Haoyu and Achamyeleh, Yonatan and Iddamsetty, Praneetsai and Al Faruque, Mohammad}, title = {Out of Sight, Out of Track: Adversarial Attacks on Propagation-based Multi-Object Trackers via Query State Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13326-13335} }
VLIC: Vision-Language Models As Perceptual Judges for Human-Aligned Image Compression: Kyle Sargent,

Ruiqi Gao,

Philipp Henzler,

Charles Herrmann,

Aleksander Holynski,

Li Fei-Fei,

Jiajun Wu,

Jason Y. Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargent_2026_CVPR, author = {Sargent, Kyle and Gao, Ruiqi and Henzler, Philipp and Herrmann, Charles and Holynski, Aleksander and Fei-Fei, Li and Wu, Jiajun and Zhang, Jason Y.}, title = {VLIC: Vision-Language Models As Perceptual Judges for Human-Aligned Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10462-10471} }
HiconAgent: History Context-aware Policy Optimization for GUI Agents: Xurui Zhou,

Gongwei Chen,

Yuquan Xie,

Zaijing Li,

Kaiwen Zhou,

Shuai Wang,

Shuo Yang,

Zhuotao Tian,

Rui Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xurui and Chen, Gongwei and Xie, Yuquan and Li, Zaijing and Zhou, Kaiwen and Wang, Shuai and Yang, Shuo and Tian, Zhuotao and Shao, Rui}, title = {HiconAgent: History Context-aware Policy Optimization for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13028-13038} }
CLaD: Planning with Grounded Foresight via Cross-Modal Latent Dynamics: Andrew Jeong,

Jaemin Kim,

Sebin Lee,

Sung-Eui Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Andrew and Kim, Jaemin and Lee, Sebin and Yoon, Sung-Eui}, title = {CLaD: Planning with Grounded Foresight via Cross-Modal Latent Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {966-975} }
Unblur-SLAM: Dense Neural SLAM for Blurry Inputs: Qi Zhang,

Denis Rozumny,

Francesco Girlanda,

Sezer Karaoglu,

Marc Pollefeys,

Theo Gevers,

Martin R. Oswald; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qi and Rozumny, Denis and Girlanda, Francesco and Karaoglu, Sezer and Pollefeys, Marc and Gevers, Theo and Oswald, Martin R.}, title = {Unblur-SLAM: Dense Neural SLAM for Blurry Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {352-362} }
GDPO-SR: Group Direct Preference Optimization for One-Step Generative Image Super-Resolution: Qiaosi Yi,

Shuai Li,

Rongyuan Wu,

Lingchen Sun,

Zhengqiang Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Qiaosi and Li, Shuai and Wu, Rongyuan and Sun, Lingchen and Zhang, Zhengqiang and Zhang, Lei}, title = {GDPO-SR: Group Direct Preference Optimization for One-Step Generative Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2177-2187} }
SceneScribe-1M: A Large-Scale Video Dataset with Comprehensive Geometric and Semantic Annotations: Yunnan Wang,

Kecheng Zheng,

Jianyuan Wang,

Minghao Chen,

David Novotny,

Christian Rupprecht,

Yinghao Xu,

Xing Zhu,

Wenjun Zeng,

Xin Jin,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yunnan and Zheng, Kecheng and Wang, Jianyuan and Chen, Minghao and Novotny, David and Rupprecht, Christian and Xu, Yinghao and Zhu, Xing and Zeng, Wenjun and Jin, Xin and Shen, Yujun}, title = {SceneScribe-1M: A Large-Scale Video Dataset with Comprehensive Geometric and Semantic Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12628-12639} }
Direction-aware 3D Large Multimodal Models: Quan Liu,

Weihao Xuan,

Junjue Wang,

Naoto Yokoya,

Ling Shao,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Quan and Xuan, Weihao and Wang, Junjue and Yokoya, Naoto and Shao, Ling and Lu, Shijian}, title = {Direction-aware 3D Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9668-9678} }
Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding: Zhongxing Xu,

Zhonghua Wang,

Zhe Qian,

Dachuan Shi,

Feilong Tang,

Ming Hu,

Shiyan Su,

Xiaocheng Zou,

Wei Feng,

Dwarikanath Mahapatra,

Yifan Peng,

Minquan Lin,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhongxing and Wang, Zhonghua and Qian, Zhe and Shi, Dachuan and Tang, Feilong and Hu, Ming and Su, Shiyan and Zou, Xiaocheng and Feng, Wei and Mahapatra, Dwarikanath and Peng, Yifan and Lin, Minquan and Ge, Zongyuan}, title = {Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11064-11075} }
Hierarchical Action Learning for Weakly-Supervised Action Segmentation: Junxian Huang,

Ruichu Cai,

Juntao Fang,

Hao Zhu,

Boyan Xu,

Weilin Chen,

Zijian Li,

Shenghua Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Junxian and Cai, Ruichu and Fang, Juntao and Zhu, Hao and Xu, Boyan and Chen, Weilin and Li, Zijian and Gao, Shenghua}, title = {Hierarchical Action Learning for Weakly-Supervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6054-6064} }
MuCo: Multi-turn Contrastive Learning for Multimodal Embedding Model: Geonmo Gu,

Byeongho Heo,

Jaemyung Yu,

Jaehui Hwang,

Taekyung Kim,

Sangmin Lee,

HeeJae Jun,

Yoohoon Kang,

Sangdoo Yun,

Dongyoon Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Geonmo and Heo, Byeongho and Yu, Jaemyung and Hwang, Jaehui and Kim, Taekyung and Lee, Sangmin and Jun, HeeJae and Kang, Yoohoon and Yun, Sangdoo and Han, Dongyoon}, title = {MuCo: Multi-turn Contrastive Learning for Multimodal Embedding Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1749-1758} }
Multi-view Crowd Tracking Transformer with View-Ground Interactions Under Large Real-World Scenes: Qi Zhang,

Jixuan Chen,

Kaiyi Zhang,

Xinquan Yu,

Antoni B. Chan,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qi and Chen, Jixuan and Zhang, Kaiyi and Yu, Xinquan and Chan, Antoni B. and Huang, Hui}, title = {Multi-view Crowd Tracking Transformer with View-Ground Interactions Under Large Real-World Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13626-13635} }
Instruction-Guided Lesion Segmentation for Chest X-rays with Automatically Generated Large-Scale Dataset: Geon Choi,

Hangyul Yoon,

Hyunju Shin,

Hyunki Park,

Sang Hoon Seo,

Eunho Yang,

Edward Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Geon and Yoon, Hangyul and Shin, Hyunju and Park, Hyunki and Seo, Sang Hoon and Yang, Eunho and Choi, Edward}, title = {Instruction-Guided Lesion Segmentation for Chest X-rays with Automatically Generated Large-Scale Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1482-1492} }
Lifting Unlabeled Internet-level Data for 3D Scene Understanding: Yixin Chen,

Yaowei Zhang,

Huangyue Yu,

Junchao He,

Yan Wang,

Jiangyong Huang,

Hongyu Shen,

Junfeng Ni,

Shaofei Wang,

Baoxiong Jia,

Song-Chun Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Zhang, Yaowei and Yu, Huangyue and He, Junchao and Wang, Yan and Huang, Jiangyong and Shen, Hongyu and Ni, Junfeng and Wang, Shaofei and Jia, Baoxiong and Zhu, Song-Chun and Huang, Siyuan}, title = {Lifting Unlabeled Internet-level Data for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5814-5827} }
AudioAvatar: Personalized Audio-driven Whole-body Talking Avatars: Seungeun Lee,

SeungJun Moon,

Hah Min Lew,

Ji-Su Kang,

Gyeong-Moon Park; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seungeun and Moon, SeungJun and Lew, Hah Min and Kang, Ji-Su and Park, Gyeong-Moon}, title = {AudioAvatar: Personalized Audio-driven Whole-body Talking Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3998-4010} }
GUIDE: A Benchmark for Understanding and Assisting Users in Open-Ended GUI Tasks: Saelyne Yang,

Jaesang Yu,

Yi-Hao Peng,

Kevin Qinghong Lin,

Jae Won Cho,

Yale Song,

Juho Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Saelyne and Yu, Jaesang and Peng, Yi-Hao and Lin, Kevin Qinghong and Cho, Jae Won and Song, Yale and Kim, Juho}, title = {GUIDE: A Benchmark for Understanding and Assisting Users in Open-Ended GUI Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13017-13027} }
Yume1.5: A Text-Controlled Interactive World Generation Model: Xiaofeng Mao,

Zhen Li,

Chuanhao Li,

Xiaojie Xu,

Kaining Ying,

Kaipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Xiaofeng and Li, Zhen and Li, Chuanhao and Xu, Xiaojie and Ying, Kaining and Zhang, Kaipeng}, title = {Yume1.5: A Text-Controlled Interactive World Generation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7752-7761} }
UST-Hand: An Uncertainty-aware Spatiotemporal Point Cloud Interaction Network for 3D Self-supervised Hand Pose Estimation: Tianhao Han,

Haoyang Zhang,

Liang Xie,

Haochen Chang,

Kun Gao,

Yuan Cheng,

Pengfei Ren,

Erwei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Tianhao and Zhang, Haoyang and Xie, Liang and Chang, Haochen and Gao, Kun and Cheng, Yuan and Ren, Pengfei and Yin, Erwei}, title = {UST-Hand: An Uncertainty-aware Spatiotemporal Point Cloud Interaction Network for 3D Self-supervised Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8857-8867} }
Monet: Reasoning in Latent Visual Space Beyond Image and Language: Qixun Wang,

Yang Shi,

Yifei Wang,

Yuanxing Zhang,

Pengfei Wan,

Kun Gai,

Xianghua Ying,

Yisen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qixun and Shi, Yang and Wang, Yifei and Zhang, Yuanxing and Wan, Pengfei and Gai, Kun and Ying, Xianghua and Wang, Yisen}, title = {Monet: Reasoning in Latent Visual Space Beyond Image and Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12030-12040} }
GLINT: Modeling Scene-Scale Transparency via Gaussian Radiance Transport: Youngju Na,

Jaeseong Yun,

Soohyun Ryu,

Hyunsu Kim,

Sung-Eui Yoon,

Suyong Yeon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Na_2026_CVPR, author = {Na, Youngju and Yun, Jaeseong and Ryu, Soohyun and Kim, Hyunsu and Yoon, Sung-Eui and Yeon, Suyong}, title = {GLINT: Modeling Scene-Scale Transparency via Gaussian Radiance Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7256-7265} }
CoCoVideo: The High-Quality Commercial-Model-Based Contrastive Benchmark for AI-Generated Video Detection: Huidong Feng,

Wentao Chen,

Jie Chen,

Xinqi Cai,

Ruolong Ma,

Yinglin Zheng,

Yuxin Lin,

Ming Zeng; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Huidong and Chen, Wentao and Chen, Jie and Cai, Xinqi and Ma, Ruolong and Zheng, Yinglin and Lin, Yuxin and Zeng, Ming}, title = {CoCoVideo: The High-Quality Commercial-Model-Based Contrastive Benchmark for AI-Generated Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11346-11356} }
When to Think and When to Look: Uncertainty-Guided Lookback: Jing Bi,

Filippos Bellos,

Junjia Guo,

Yayuan Li,

Chao Huang,

Yunlong Tang,

Luchuan Song,

Susan Liang,

Zhongfei Zhang,

Jason J. Corso,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bi_2026_CVPR, author = {Bi, Jing and Bellos, Filippos and Guo, Junjia and Li, Yayuan and Huang, Chao and Tang, Yunlong and Song, Luchuan and Liang, Susan and Zhang, Zhongfei and Corso, Jason J. and Xu, Chenliang}, title = {When to Think and When to Look: Uncertainty-Guided Lookback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5104-5113} }
MangoBench: A Benchmark for Multi-Agent Goal-Conditioned Offline Reinforcement Learning: Yi Wang,

Ningze Zhong,

Zhiheng Fu,

Longguang Wang,

Ye Zhang,

Yulan Guo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yi and Zhong, Ningze and Fu, Zhiheng and Wang, Longguang and Zhang, Ye and Guo, Yulan}, title = {MangoBench: A Benchmark for Multi-Agent Goal-Conditioned Offline Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6219-6228} }
ViLoMem: Agentic Learner with Grow-and-Refine Multimodal Semantic Memory: Weihao Bo,

Shan Zhang,

Yanpeng Sun,

Jingjing Wu,

Qunyi Xie,

Xiao Tan,

Kunbin Chen,

Wei He,

Xiaofan Li,

Na Zhao,

Jingdong Wang,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Weihao and Zhang, Shan and Sun, Yanpeng and Wu, Jingjing and Xie, Qunyi and Tan, Xiao and Chen, Kunbin and He, Wei and Li, Xiaofan and Zhao, Na and Wang, Jingdong and Li, Zechao}, title = {ViLoMem: Agentic Learner with Grow-and-Refine Multimodal Semantic Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5476-5486} }
Learnability-Driven Submodular Optimization for Active Roadside 3D Detection: Ruiyu Mao,

Baoming Zhang,

Nicholas Ruozzi,

Yunhui Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Ruiyu and Zhang, Baoming and Ruozzi, Nicholas and Guo, Yunhui}, title = {Learnability-Driven Submodular Optimization for Active Roadside 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11579-11588} }
Tea-Adapter: Teacher Adapter for Efficient Conditional Generation: Yinhan Zhang,

Yue Ma,

Fangqiu Yi,

Chenyang Qi,

Chi Zhang,

Kunyu Feng,

Zeyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yinhan and Ma, Yue and Yi, Fangqiu and Qi, Chenyang and Zhang, Chi and Feng, Kunyu and Wang, Zeyu}, title = {Tea-Adapter: Teacher Adapter for Efficient Conditional Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4805-4815} }
Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning: Xinghao Wu,

Jianwei Niu,

Xuefeng Liu,

Guogang Zhu,

Jiayuan Zhang,

Shaojie Tang,

Wei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xinghao and Niu, Jianwei and Liu, Xuefeng and Zhu, Guogang and Zhang, Jiayuan and Tang, Shaojie and Chen, Wei}, title = {Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10313-10323} }
Memory Matters: Boosting Training-Free Zero-Shot Temporal Action Localization with a Learnable Lookup Table: Han Jiang,

Haoyu Tang,

Xiaoxuan Mu,

Chen Li,

Jihua Zhu; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Han and Tang, Haoyu and Mu, Xiaoxuan and Li, Chen and Zhu, Jihua}, title = {Memory Matters: Boosting Training-Free Zero-Shot Temporal Action Localization with a Learnable Lookup Table}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9762-9772} }
TRCoRSurg: Temporal-Relational Co-Reasoning for Surgical Video Triplet Recognition: Fang Li,

Shihao Zou,

Weixin Si,

Yang Gao,

Shuai Li,

Aimin Hao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Fang and Zou, Shihao and Si, Weixin and Gao, Yang and Li, Shuai and Hao, Aimin}, title = {TRCoRSurg: Temporal-Relational Co-Reasoning for Surgical Video Triplet Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2811-2820} }
Don't Show Pixels, Show Cues: Unlocking Visual Tool Reasoning in Language Models via Perception Programs: Muhammad Kamran Janjua,

Hugo Silva,

Di Niu,

Bahador Rashidi; [pdf] [supp]
[bibtex]
@InProceedings{Janjua_2026_CVPR, author = {Janjua, Muhammad Kamran and Silva, Hugo and Niu, Di and Rashidi, Bahador}, title = {Don't Show Pixels, Show Cues: Unlocking Visual Tool Reasoning in Language Models via Perception Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5165-5174} }
Prime Once, then Reprogram Locally: An Efficient Alternative to Black-Box Service Model Adaptation: Yunbei Zhang,

Chengyi Cai,

Feng Liu,

Jihun Hamm; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yunbei and Cai, Chengyi and Liu, Feng and Hamm, Jihun}, title = {Prime Once, then Reprogram Locally: An Efficient Alternative to Black-Box Service Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6176-6187} }
Clothe and Pose: Nakul Sharma,

Aayush Bansal,

Minh Vo; [pdf] [supp]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Nakul and Bansal, Aayush and Vo, Minh}, title = {Clothe and Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2015-2024} }
FAVE: A Structured Benchmark for Fine-Grained Audio-Visual Temporal Evaluation in Multimodal LLMs: Weiheng Lu,

An Yu,

Jian Li,

Zhenfei Zhang,

Felix X.-F. Ye,

Ming-Ching Chang; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Weiheng and Yu, An and Li, Jian and Zhang, Zhenfei and Ye, Felix X.-F. and Chang, Ming-Ching}, title = {FAVE: A Structured Benchmark for Fine-Grained Audio-Visual Temporal Evaluation in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1651-1660} }
Bootstrap Dynamic-Aware 3D Visual Representation for Scalable Robot Learning: Qiwei Liang,

Boyang Cai,

Minghao Lai,

Sitong Zhuang,

Tao Lin,

Yan Qin,

Yixuan Ye,

Jiaming Liang,

Renjing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Qiwei and Cai, Boyang and Lai, Minghao and Zhuang, Sitong and Lin, Tao and Qin, Yan and Ye, Yixuan and Liang, Jiaming and Xu, Renjing}, title = {Bootstrap Dynamic-Aware 3D Visual Representation for Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13419-13429} }
Proof-of-Perception: Certified Tool-Using Multimodal Reasoning with Compositional Conformal Guarantees: Arya Fayyazi,

Haleh Akrami; [pdf] [arXiv]
[bibtex]
@InProceedings{Fayyazi_2026_CVPR, author = {Fayyazi, Arya and Akrami, Haleh}, title = {Proof-of-Perception: Certified Tool-Using Multimodal Reasoning with Compositional Conformal Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5144-5153} }
Generating Humanless Environment Walkthroughs from Egocentric Walking Tour Videos: Yujin Ham,

Junho Kim,

Vivek Boominathan,

Guha Balakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ham_2026_CVPR, author = {Ham, Yujin and Kim, Junho and Boominathan, Vivek and Balakrishnan, Guha}, title = {Generating Humanless Environment Walkthroughs from Egocentric Walking Tour Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4322-4331} }
RHCNet: Residual-Guided Hierarchical Calibration Network for Robust Underwater Object Detection: Yueying Wang,

Yiteng Guo,

Weidong Zhang,

Jie Wen,

Liquan Shen,

Huaicheng Yan,

Xin Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yueying and Guo, Yiteng and Zhang, Weidong and Wen, Jie and Shen, Liquan and Yan, Huaicheng and Xu, Xin}, title = {RHCNet: Residual-Guided Hierarchical Calibration Network for Robust Underwater Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4393-4402} }
Diffusion-Based Native Adversarial Synthesis for Enhanced Medical Segmentation Generalization: Hongyu Zhang,

Haipeng Chen,

Zhimin Xu,

Chengxin Yang,

Yingda Lyu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hongyu and Chen, Haipeng and Xu, Zhimin and Yang, Chengxin and Lyu, Yingda}, title = {Diffusion-Based Native Adversarial Synthesis for Enhanced Medical Segmentation Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1461-1471} }
CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation: Mainak Singha,

Sarthak Mehrotra,

Paolo Casari,

Subhasis Chaudhuri,

Elisa Ricci,

Biplab Banerjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singha_2026_CVPR, author = {Singha, Mainak and Mehrotra, Sarthak and Casari, Paolo and Chaudhuri, Subhasis and Ricci, Elisa and Banerjee, Biplab}, title = {CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9582-9592} }
TimeLens: Rethinking Video Temporal Grounding with Multimodal LLMs: Jun Zhang,

Teng Wang,

Yuying Ge,

Yixiao Ge,

Xinhao Li,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jun and Wang, Teng and Ge, Yuying and Ge, Yixiao and Li, Xinhao and Wang, Limin}, title = {TimeLens: Rethinking Video Temporal Grounding with Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10419-10429} }
HypeVPR: Exploring Hyperbolic Space for Perspective to Equirectangular Visual Place Recognition: Suhan Woo,

Seongwon Lee,

Jinwoo Jang,

Euntai Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Woo_2026_CVPR, author = {Woo, Suhan and Lee, Seongwon and Jang, Jinwoo and Kim, Euntai}, title = {HypeVPR: Exploring Hyperbolic Space for Perspective to Equirectangular Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12183-12192} }
VGA: Empowering Aerial-Ground Localization by Visual Geometry Alignment: Tao Jun Lin,

Yujiao Shi,

Hongdong Li; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Tao Jun and Shi, Yujiao and Li, Hongdong}, title = {VGA: Empowering Aerial-Ground Localization by Visual Geometry Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5409-5420} }
AwareVLN: Reasoning with Self-awareness for Vision-Language Navigation: Wenxuan Guo,

Xiuwei Xu,

Yichen Liu,

Xiangyu Li,

Hang Yin,

Huangxing Chen,

Wenzhao Zheng,

Jianjiang Feng,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Wenxuan and Xu, Xiuwei and Liu, Yichen and Li, Xiangyu and Yin, Hang and Chen, Huangxing and Zheng, Wenzhao and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {AwareVLN: Reasoning with Self-awareness for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4065-4075} }
Adaptive Spatial-Temporal Window: Unlocking the Potential of Event Cameras in Heterogeneous Velocity Scenarios: Zhipeng Sui,

Haiqing Hao,

Weihua He,

Seng-Hong Lee,

Wenhui Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sui_2026_CVPR, author = {Sui, Zhipeng and Hao, Haiqing and He, Weihua and Lee, Seng-Hong and Wang, Wenhui}, title = {Adaptive Spatial-Temporal Window: Unlocking the Potential of Event Cameras in Heterogeneous Velocity Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {946-955} }
RS-SSM: Refining Forgotten Specifics in State Space Model for Video Semantic Segmentation: Kai Zhu,

Zhenyu Cui,

Zehua Zang,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kai and Cui, Zhenyu and Zang, Zehua and Zhou, Jiahuan}, title = {RS-SSM: Refining Forgotten Specifics in State Space Model for Video Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10741-10752} }
Re-Align: Structured Reasoning-guided Alignment for In-Context Image Generation and Editing: Runze He,

Yiji Cheng,

Tiankai Hang,

Zhimin Li,

Yu Xu,

Zijin Yin,

Shiyi Zhang,

Wenxun Dai,

Penghui Du,

Ao Ma,

Chunyu Wang,

Qinglin Lu,

Jizhong Han,

Jiao Dai; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Runze and Cheng, Yiji and Hang, Tiankai and Li, Zhimin and Xu, Yu and Yin, Zijin and Zhang, Shiyi and Dai, Wenxun and Du, Penghui and Ma, Ao and Wang, Chunyu and Lu, Qinglin and Han, Jizhong and Dai, Jiao}, title = {Re-Align: Structured Reasoning-guided Alignment for In-Context Image Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9051-9062} }
PositionIC: Unified Position and Identity Consistency for Image Customization: Junjie Hu,

Tianyang Han,

Kai Ma,

Jialin Gao,

Yang Song,

Xianhua He,

Junfeng Luo,

Xiaoming Wei,

Wenqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Junjie and Han, Tianyang and Ma, Kai and Gao, Jialin and Song, Yang and He, Xianhua and Luo, Junfeng and Wei, Xiaoming and Zhang, Wenqiang}, title = {PositionIC: Unified Position and Identity Consistency for Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9139-9148} }
CLIP Is Shortsighted: Paying Attention Beyond the First Sentence: Marc-Antoine Lavoie,

Anas Mahmoud,

Aldo Zaimi,

Arsene Fansi Tchango,

Steven L. Waslander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lavoie_2026_CVPR, author = {Lavoie, Marc-Antoine and Mahmoud, Anas and Zaimi, Aldo and Tchango, Arsene Fansi and Waslander, Steven L.}, title = {CLIP Is Shortsighted: Paying Attention Beyond the First Sentence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9524-9534} }
Do Vision-Language Models Leak What They Learn? Adaptive Token-Weighted Model Inversion Attacks: Ngoc-Bao Nguyen,

Sy-Tuyen Ho,

Koh Jun Hao,

Ngai-Man Cheung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Ngoc-Bao and Ho, Sy-Tuyen and Hao, Koh Jun and Cheung, Ngai-Man}, title = {Do Vision-Language Models Leak What They Learn? Adaptive Token-Weighted Model Inversion Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10283-10292} }
Revisiting Geometric Obfuscation with Dual Convergent Lines for Privacy-Preserving Image Queries in Visual Localization: Jeonggon Kim,

Heejoon Moon,

Je Hyeong Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonggon and Moon, Heejoon and Hong, Je Hyeong}, title = {Revisiting Geometric Obfuscation with Dual Convergent Lines for Privacy-Preserving Image Queries in Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {113-123} }
Sparsity-Aware Voxel Attention and Foreground Modulation for 3D Semantic Scene Completion: Yu Xue,

Longjun Gao,

Yuanqi Su,

HaoAng Lu,

Xiaoning Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Yu and Gao, Longjun and Su, Yuanqi and Lu, HaoAng and Zhang, Xiaoning}, title = {Sparsity-Aware Voxel Attention and Foreground Modulation for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5751-5761} }
TV2TV: A Unified Framework for Interleaved Language and Video Generation: Xiaochuang Han,

Youssef Emad,

Melissa Hall,

John Nguyen,

Karthik Padthe,

Liam Robbins,

Amir Bar,

Delong Chen,

Michal Drozdzal,

Maha Elbayad,

Yushi Hu,

Shang-Wen Li,

Jakob Verbeek,

XuDong Wang,

Marjan Ghazvininejad,

Luke Zettlemoyer,

Emily Dinan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xiaochuang and Emad, Youssef and Hall, Melissa and Nguyen, John and Padthe, Karthik and Robbins, Liam and Bar, Amir and Chen, Delong and Drozdzal, Michal and Elbayad, Maha and Hu, Yushi and Li, Shang-Wen and Verbeek, Jakob and Wang, XuDong and Ghazvininejad, Marjan and Zettlemoyer, Luke and Dinan, Emily}, title = {TV2TV: A Unified Framework for Interleaved Language and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7695-7706} }
Pose-Free Omnidirectional Gaussian Splatting for 360-Degree Videos with Consistent Depth Priors: Chuanqing Zhuang,

Xin Lu,

Zehui Deng,

Zhengda Lu,

Yiqun Wang,

Junqi Diao,

Jun Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Chuanqing and Lu, Xin and Deng, Zehui and Lu, Zhengda and Wang, Yiqun and Diao, Junqi and Xiao, Jun}, title = {Pose-Free Omnidirectional Gaussian Splatting for 360-Degree Videos with Consistent Depth Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4881-4890} }
ObjectMorpher: 3D-Aware Image Editing via Deformable 3DGS: Yuhuan Xie,

Aoxuan Pan,

Yi-Hua Huang,

Chirui Chang,

Peng Dai,

Xin Yu,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yuhuan and Pan, Aoxuan and Huang, Yi-Hua and Chang, Chirui and Dai, Peng and Yu, Xin and Qi, Xiaojuan}, title = {ObjectMorpher: 3D-Aware Image Editing via Deformable 3DGS}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5828-5838} }
SafeRoPE: Risk-specific Head-wise Embedding Rotation for Safe Generation in Rectified Flow Transformers: Xiang Yang,

Feifei Li,

Mi Zhang,

Geng Hong,

Xiaoyu You,

Min Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiang and Li, Feifei and Zhang, Mi and Hong, Geng and You, Xiaoyu and Yang, Min}, title = {SafeRoPE: Risk-specific Head-wise Embedding Rotation for Safe Generation in Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {690-700} }
Self-Paced and Self-Corrective Masked Prediction for Movie Trailer Generation: Sidan Zhu,

Hongteng Xu,

Dixin Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Sidan and Xu, Hongteng and Luo, Dixin}, title = {Self-Paced and Self-Corrective Masked Prediction for Movie Trailer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7684-7694} }
GenErase: Generalizable and Semantically-Aware Concept Erasure in Diffusion Models: Korada Sri Vardhana,

Soma Biswas; [pdf] [supp]
[bibtex]
@InProceedings{Vardhana_2026_CVPR, author = {Vardhana, Korada Sri and Biswas, Soma}, title = {GenErase: Generalizable and Semantically-Aware Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2327-2335} }
RawMetaDiff: Unlocking Extreme Darkness from Dual-Exposure RAW with Meta-Guided Diffusion: Panjun Liu,

Jiyuan Xia,

Yuanshen Guan,

Yong Li,

Zhiqiang Lang,

Ruikang Xu,

Chang Chen,

Dehua Song,

Fenglong Song,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Panjun and Xia, Jiyuan and Guan, Yuanshen and Li, Yong and Lang, Zhiqiang and Xu, Ruikang and Chen, Chang and Song, Dehua and Song, Fenglong and Xiong, Zhiwei}, title = {RawMetaDiff: Unlocking Extreme Darkness from Dual-Exposure RAW with Meta-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5617-5626} }
NimbusGS: Unified 3D Scene Reconstruction under Hybrid Weather: Yanying Li,

Jinyang Li,

Shengfeng He,

Yangyang Xu,

Junyu Dong,

Yong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanying and Li, Jinyang and He, Shengfeng and Xu, Yangyang and Dong, Junyu and Du, Yong}, title = {NimbusGS: Unified 3D Scene Reconstruction under Hybrid Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5038-5048} }
FinPercep-RM: A Fine-grained Reward Model and Co-evolutionary Curriculum for RL-based Real-world Super-Resolution: Yidi Liu,

Zihao Fan,

Jie Huang,

Jie Xiao,

Dong Li,

Wenlong Zhang,

LEI BAI,

Xueyang Fu,

Zheng-jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yidi and Fan, Zihao and Huang, Jie and Xiao, Jie and Li, Dong and Zhang, Wenlong and BAI, LEI and Fu, Xueyang and Zha, Zheng-jun}, title = {FinPercep-RM: A Fine-grained Reward Model and Co-evolutionary Curriculum for RL-based Real-world Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4839-4849} }
PrivateEyes: Gaze-Preserving Anonymization for Data Sharing: Surabhi Gupta,

Dinesh Prabhu Muthumariappan,

Biplab Das,

Anoop Kolar Rajagopal,

Kiran Nanjunda Iyer,

Donghwan Seo; [pdf] [supp]
[bibtex]
@InProceedings{Gupta_2026_CVPR, author = {Gupta, Surabhi and Muthumariappan, Dinesh Prabhu and Das, Biplab and Rajagopal, Anoop Kolar and Iyer, Kiran Nanjunda and Seo, Donghwan}, title = {PrivateEyes: Gaze-Preserving Anonymization for Data Sharing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3274-3283} }
All Vehicles Can Lie: Efficient Adversarial Defense in Fully Untrusted-Vehicle Collaborative Perception via Pseudo-Random Bayesian Inference: Yi Yu,

Libing Wu,

Zhuangzhuang Zhang,

Jing Qiu,

Lijuan Huo,

Jiaqi Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yi and Wu, Libing and Zhang, Zhuangzhuang and Qiu, Jing and Huo, Lijuan and Feng, Jiaqi}, title = {All Vehicles Can Lie: Efficient Adversarial Defense in Fully Untrusted-Vehicle Collaborative Perception via Pseudo-Random Bayesian Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6549-6558} }
C-LaV: Conditional Latent Velocity Field Denoising for Weather-Robust LiDAR Place Recognition: Xuewei Cao,

Jiayue Yang,

Zhiwen Zeng,

Yanyong Zhang,

Yan Xia; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Xuewei and Yang, Jiayue and Zeng, Zhiwen and Zhang, Yanyong and Xia, Yan}, title = {C-LaV: Conditional Latent Velocity Field Denoising for Weather-Robust LiDAR Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2928-2937} }
Uncertainty-Aware Knowledge Distillation for Multimodal Large Language Models: Jingchen Sun,

Shaobo Han,

Deep Patel,

Wataru Kohno,

Can Jin,

Changyou Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jingchen and Han, Shaobo and Patel, Deep and Kohno, Wataru and Jin, Can and Chen, Changyou}, title = {Uncertainty-Aware Knowledge Distillation for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5585-5595} }
GUI-SAGE: Enhancing GUI Automation with Self-Explanatory Learning: Fei Tang,

Zhangxuan Gu,

Zhengxi Lu,

Shangzhan Zhang,

Zhengwen Zeng,

Shuheng Shen,

Changhua Meng,

Yuchen Yan,

Wenqi Zhang,

Yongliang Shen,

Weiming Lu,

Yueting Zhuang; [pdf]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Fei and Gu, Zhangxuan and Lu, Zhengxi and Zhang, Shangzhan and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Yan, Yuchen and Zhang, Wenqi and Shen, Yongliang and Lu, Weiming and Zhuang, Yueting}, title = {GUI-SAGE: Enhancing GUI Automation with Self-Explanatory Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13007-13016} }
A3: Towards Advertising Aesthetic Assessment: Kaiyuan Ji,

Yixuan Gao,

Lu Sun,

Yushuo Zheng,

Zijian Chen,

Jianbo Zhang,

Xiangyang Zhu,

Yuan Tian,

Zicheng Zhang,

Guangtao Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Kaiyuan and Gao, Yixuan and Sun, Lu and Zheng, Yushuo and Chen, Zijian and Zhang, Jianbo and Zhu, Xiangyang and Tian, Yuan and Zhang, Zicheng and Zhai, Guangtao}, title = {A3: Towards Advertising Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9478-9490} }
SigLino: Efficient Multi-Teacher Distillation for Agglomerative Vision Foundation Models: Sofian Chaybouti,

Sanath Narayan,

Yasser Dahou,

Phúc H. Lê Khắc,

Ankit Singh,

Ngoc Huynh,

Wamiq Reyaz Para,

Hilde Kuehne,

Hakim Hacid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaybouti_2026_CVPR, author = {Chaybouti, Sofian and Narayan, Sanath and Dahou, Yasser and Khắc, Ph\'uc H. L\^e and Singh, Ankit and Huynh, Ngoc and Para, Wamiq Reyaz and Kuehne, Hilde and Hacid, Hakim}, title = {SigLino: Efficient Multi-Teacher Distillation for Agglomerative Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10071-10081} }
FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control: Zhiyuan Zhang,

Can Wang,

Dongdong Chen,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhiyuan and Wang, Can and Chen, Dongdong and Liao, Jing}, title = {FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4221-4231} }
Enhancing Accuracy of Uncertainty Estimation in Appearance-based Gaze Tracking with Probabilistic Evaluation and Calibration: Qiaojie Zheng,

Jiucai Zhang,

Amy Zhang,

Xiaoli Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Qiaojie and Zhang, Jiucai and Zhang, Amy and Zhang, Xiaoli}, title = {Enhancing Accuracy of Uncertainty Estimation in Appearance-based Gaze Tracking with Probabilistic Evaluation and Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13793-13801} }
Hier-COS: Making Deep Features Hierarchy-aware via Composition of Orthogonal Subspaces: Depanshu Sani,

Saket Anand; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sani_2026_CVPR, author = {Sani, Depanshu and Anand, Saket}, title = {Hier-COS: Making Deep Features Hierarchy-aware via Composition of Orthogonal Subspaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11263-11272} }
CoLC: Communication-Efficient Collaborative Perception with LiDAR Completion: Yushan Han,

Hui Zhang,

Qiming Xia,

Yi Jin,

Yidong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Yushan and Zhang, Hui and Xia, Qiming and Jin, Yi and Li, Yidong}, title = {CoLC: Communication-Efficient Collaborative Perception with LiDAR Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2983-2992} }
Consistent Instance Field for Dynamic Scene Understanding: Junyi Wu,

Van Nguyen Nguyen,

Benjamin Planche,

Jiachen Tao,

Changchang Sun,

Zhongpai Gao,

Zhenghao Zhao,

Anwesa Choudhuri,

Gengyu Zhang,

Meng Zheng,

Feiran Wang,

Terrence Chen,

Yan Yan,

Ziyan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Junyi and Nguyen, Van Nguyen and Planche, Benjamin and Tao, Jiachen and Sun, Changchang and Gao, Zhongpai and Zhao, Zhenghao and Choudhuri, Anwesa and Zhang, Gengyu and Zheng, Meng and Wang, Feiran and Chen, Terrence and Yan, Yan and Wu, Ziyan}, title = {Consistent Instance Field for Dynamic Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3783-3793} }
FoleyDesigner: Immersive Stereo Foley Generation with Precise Spatio-Temporal Alignment for Film Clips: Mengtian Li,

Kunyan Dai,

Yi Ding,

Ruobing Ni,

Ying Zhang,

Wenwu Wang,

Zhifeng Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Dai, Kunyan and Ding, Yi and Ni, Ruobing and Zhang, Ying and Wang, Wenwu and Xie, Zhifeng}, title = {FoleyDesigner: Immersive Stereo Foley Generation with Precise Spatio-Temporal Alignment for Film Clips}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4291-4300} }
Intra-class Distribution-guided Generative Hashing with Neighbor Refinement for Cross-modal Retrieval: Hao Sun,

Yadong Huo,

Qibing Qin,

Wenfeng Zhang,

Lei Huang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Hao and Huo, Yadong and Qin, Qibing and Zhang, Wenfeng and Huang, Lei}, title = {Intra-class Distribution-guided Generative Hashing with Neighbor Refinement for Cross-modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2671-2681} }
UDAPose: Unsupervised Domain Adaptation for Low-Light Human Pose Estimation: Haopeng Chen,

Yihao Ai,

Kabeen Kim,

Robby T. Tan,

Yixin Chen,

Bo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haopeng and Ai, Yihao and Kim, Kabeen and Tan, Robby T. and Chen, Yixin and Wang, Bo}, title = {UDAPose: Unsupervised Domain Adaptation for Low-Light Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13781-13792} }
GVIS: Generative Vector Image Steganography: Zihao Xu,

Dawei Xu,

Zihan Li,

Xixi Zheng,

Chuan Zhang; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zihao and Xu, Dawei and Li, Zihan and Zheng, Xixi and Zhang, Chuan}, title = {GVIS: Generative Vector Image Steganography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9384-9393} }
Adapting Point Cloud Analysis via Multimodal Bayesian Distribution Learning: Xingyu Zhu,

Liang Yi,

Shuo Wang,

Wenbo Zhu,

Yongliang Wu,

Beier Zhu,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingyu and Yi, Liang and Wang, Shuo and Zhu, Wenbo and Wu, Yongliang and Zhu, Beier and Zhang, Hanwang}, title = {Adapting Point Cloud Analysis via Multimodal Bayesian Distribution Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9976-9985} }
MOSAIC-GS: Monocular Scene Reconstruction via Advanced Initialization for Complex Dynamic Environments: Svitlana Morkva,

Vaishakh Patil,

Alessio Tonioni,

Michael Oechsle,

Maximum Wilder-Smith,

Marco Hutter; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morkva_2026_CVPR, author = {Morkva, Svitlana and Patil, Vaishakh and Tonioni, Alessio and Oechsle, Michael and Wilder-Smith, Maximum and Hutter, Marco}, title = {MOSAIC-GS: Monocular Scene Reconstruction via Advanced Initialization for Complex Dynamic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1167-1176} }
Physically Inspired Gaussian Splatting for HDR Novel View Synthesis: Huimin Zeng,

Yue Bai,

Hailing Wang,

Yun Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Huimin and Bai, Yue and Wang, Hailing and Fu, Yun}, title = {Physically Inspired Gaussian Splatting for HDR Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11808-11817} }
Back to Source: Open-Set Continual Test-Time Adaptation via Domain Compensation: Yingkai Yang,

Chaoqi Chen,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yingkai and Chen, Chaoqi and Huang, Hui}, title = {Back to Source: Open-Set Continual Test-Time Adaptation via Domain Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7957-7966} }
ManifoldNeuS: Manifold-aware View Optimizability for Pose-Free Neural Surface Reconstruction: Xinxin Liu,

Xue Wang,

Guoqing Zhou,

Qing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinxin and Wang, Xue and Zhou, Guoqing and Wang, Qing}, title = {ManifoldNeuS: Manifold-aware View Optimizability for Pose-Free Neural Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {261-271} }
Unlearning without Forgetting: Securely Removing Targeted Concepts from Large-Scale Vision-Language Open-Vocabulary Detectors: Zhongze Wu,

Xiu Su,

Feng Yang,

Dan Niu,

Shan You,

Yueyi Luo,

Jun Long; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhongze and Su, Xiu and Yang, Feng and Niu, Dan and You, Shan and Luo, Yueyi and Long, Jun}, title = {Unlearning without Forgetting: Securely Removing Targeted Concepts from Large-Scale Vision-Language Open-Vocabulary Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6271-6281} }
rPPG-VQA: A Video Quality Assessment Framework for Unsupervised rPPG Training: Tianyang Dai,

Ming Chang,

Yan Chen,

Yang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Tianyang and Chang, Ming and Chen, Yan and Hu, Yang}, title = {rPPG-VQA: A Video Quality Assessment Framework for Unsupervised rPPG Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1365-1375} }
MatMart: Material Reconstruction of 3D Objects via Diffusion: Xiuchao Wu,

Pengfei Zhu,

Jiangjing Lyu,

Xinguo Liu,

Jie Guo,

Yanwen Guo,

Weiwei Xu,

Chengfei Lyu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiuchao and Zhu, Pengfei and Lyu, Jiangjing and Liu, Xinguo and Guo, Jie and Guo, Yanwen and Xu, Weiwei and Lyu, Chengfei}, title = {MatMart: Material Reconstruction of 3D Objects via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2336-2345} }
No Labels, No Look-Ahead: Unsupervised Online Video Stabilization with Classical Priors: Tao Liu,

Kan Ren,

Gang Wan,

Shibo Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tao and Ren, Kan and Wan, Gang and Wen, Shibo}, title = {No Labels, No Look-Ahead: Unsupervised Online Video Stabilization with Classical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6868-6877} }
LLaMo: Scaling Pretrained Language Models for Unified Motion Understanding and Generation with Continuous Autoregressive Tokens: Zekun Li,

Sizhe An,

Chengcheng Tang,

Chuan Guo,

Ivan Shugurov,

Linguang Zhang,

Amy Zhao,

Srinath Sridhar,

Lingling Tao,

Abhay Mittal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zekun and An, Sizhe and Tang, Chengcheng and Guo, Chuan and Shugurov, Ivan and Zhang, Linguang and Zhao, Amy and Sridhar, Srinath and Tao, Lingling and Mittal, Abhay}, title = {LLaMo: Scaling Pretrained Language Models for Unified Motion Understanding and Generation with Continuous Autoregressive Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2209-2220} }
Conditional Factuality Controlled LLMs with Generalization Certificates via Conformal Sampling: Kai Ye,

Qingtao Pan,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Kai and Pan, Qingtao and Li, Shuo}, title = {Conditional Factuality Controlled LLMs with Generalization Certificates via Conformal Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3627-3635} }
Stable and Efficient Single-Rollout RL for Multimodal Reasoning: Rui Liu,

Dian Yu,

Lei Ke,

Haolin Liu,

Yujun Zhou,

Zhenwen Liang,

Haitao Mi,

Pratap Tokekar,

Dong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Rui and Yu, Dian and Ke, Lei and Liu, Haolin and Zhou, Yujun and Liang, Zhenwen and Mi, Haitao and Tokekar, Pratap and Yu, Dong}, title = {Stable and Efficient Single-Rollout RL for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12009-12018} }
From Where Things Are to What They Are For: Benchmarking Spatial-Functional Intelligence in Multimodal LLMs: Le Zhang,

Jihan Yang,

Soundarya Krishnan,

Jimit Majmudar,

Xiou Ge,

Prasoon Puri,

Prathamesh Saraf,

Shruti Bhargava,

Dhivya Piraviperumal,

Yinan Ling,

Cindy Pan,

Hong Yu,

Aishwarya Agrawal,

Bo-Hsiang Tseng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Le and Yang, Jihan and Krishnan, Soundarya and Majmudar, Jimit and Ge, Xiou and Puri, Prasoon and Saraf, Prathamesh and Bhargava, Shruti and Piraviperumal, Dhivya and Ling, Yinan and Pan, Cindy and Yu, Hong and Agrawal, Aishwarya and Tseng, Bo-Hsiang}, title = {From Where Things Are to What They Are For: Benchmarking Spatial-Functional Intelligence in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12052-12063} }
Phrase-Grounding-Aware Supervised Fine-Tuning for Chart Recognition via Side-Masked Attention: Koichiro Ito; [pdf] [supp]
[bibtex]
@InProceedings{Ito_2026_CVPR, author = {Ito, Koichiro}, title = {Phrase-Grounding-Aware Supervised Fine-Tuning for Chart Recognition via Side-Masked Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9501-9511} }
SparseSplat: Towards Applicable Feed-Forward 3D Gaussian Splatting with Pixel-Unaligned Prediction: Zicheng Zhang,

Xiangting Meng,

Ke Wu,

Wenchao Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zicheng and Meng, Xiangting and Wu, Ke and Ding, Wenchao}, title = {SparseSplat: Towards Applicable Feed-Forward 3D Gaussian Splatting with Pixel-Unaligned Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5049-5058} }
E2EGS: Event-to-Edge Gaussian Splatting for Pose-Free 3D Reconstruction: Yunsoo Kim,

Changki Sung,

Dasol Hong,

Hyun Myung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Yunsoo and Sung, Changki and Hong, Dasol and Myung, Hyun}, title = {E2EGS: Event-to-Edge Gaussian Splatting for Pose-Free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4922-4931} }
Revisiting Optimal Coding for I-ToF under Practical Sensor Constraints: Wenbin Luo,

Takafumi Iwaguchi,

Ryusuke Sagawa,

Hiroshi Kawasaki; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Wenbin and Iwaguchi, Takafumi and Sagawa, Ryusuke and Kawasaki, Hiroshi}, title = {Revisiting Optimal Coding for I-ToF under Practical Sensor Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12501-12510} }
TrackMAE: Video Representation Learning via Track Mask and Predict: Renaud Vandeghen,

Fida Mohammad Thoker,

Marc Van Droogenbroeck,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vandeghen_2026_CVPR, author = {Vandeghen, Renaud and Thoker, Fida Mohammad and Van Droogenbroeck, Marc and Ghanem, Bernard}, title = {TrackMAE: Video Representation Learning via Track Mask and Predict}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13604-13614} }
Nonparametric Deep Fine-grained Clustering with Low-Rank Guided Vision-Language Model: Xulun Ye,

Benyu Wu,

Jie Hong,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Wu, Benyu and Hong, Jie and Zhou, Kun}, title = {Nonparametric Deep Fine-grained Clustering with Low-Rank Guided Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2434-2444} }
FreeScale: Scaling 3D Scenes via Certainty-Aware Free-View Generation: Chenhan Jiang,

Yu Chen,

Qingwen Zhang,

Jifei Song,

Songcen Xu,

Dit-Yan Yeung,

Jiankang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Chenhan and Chen, Yu and Zhang, Qingwen and Song, Jifei and Xu, Songcen and Yeung, Dit-Yan and Deng, Jiankang}, title = {FreeScale: Scaling 3D Scenes via Certainty-Aware Free-View Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {330-340} }
CARE-Edit: Condition-Aware Routing of Experts for Contextual Image Editing: Yucheng Wang,

Zedong Wang,

Yuetong Wu,

Yue Ma,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yucheng and Wang, Zedong and Wu, Yuetong and Ma, Yue and Xu, Dan}, title = {CARE-Edit: Condition-Aware Routing of Experts for Contextual Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9019-9028} }
PGA: Prior-free Generative Attack for Practical No-box Scenario: Hongyu Peng,

Xiang Yuan,

Gong Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Hongyu and Yuan, Xiang and Cheng, Gong}, title = {PGA: Prior-free Generative Attack for Practical No-box Scenario}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13296-13305} }
Translating Signals to Languages for sEMG-Based Activity Recognition: Ming Wang,

Haoxuan Qu,

Qiuhong Ke,

Wei Zhou,

Hossein Rahmani,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ming and Qu, Haoxuan and Ke, Qiuhong and Zhou, Wei and Rahmani, Hossein and Liu, Jun}, title = {Translating Signals to Languages for sEMG-Based Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9317-9329} }
Beyond Binary Contrast: Modeling Continuous Skeleton Action Spaces with Transitional Anchors: Yingjie Feng,

Yi Wang,

Jiaze Wang,

Anfeng Liu,

Zhuotao Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yingjie and Wang, Yi and Wang, Jiaze and Liu, Anfeng and Tian, Zhuotao}, title = {Beyond Binary Contrast: Modeling Continuous Skeleton Action Spaces with Transitional Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6075-6084} }
Enhancing Mixture-of-Experts Specialization via Cluster-Aware Upcycling: Sanghyeok Chu,

Pyunghwan Ahn,

Gwangmo Song,

Seung Hwan Kim,

Honglak Lee,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Sanghyeok and Ahn, Pyunghwan and Song, Gwangmo and Kim, Seung Hwan and Lee, Honglak and Han, Bohyung}, title = {Enhancing Mixture-of-Experts Specialization via Cluster-Aware Upcycling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11283-11292} }
Adapting a Pre-trained Single-Cell Foundation Model to Spatial Gene Expression Generation from Histology Images: Donghai Fang,

Yongheng Li,

Zhen Wang,

Yuansong Zeng,

Wenwen Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Donghai and Li, Yongheng and Wang, Zhen and Zeng, Yuansong and Min, Wenwen}, title = {Adapting a Pre-trained Single-Cell Foundation Model to Spatial Gene Expression Generation from Histology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5720-5729} }
OmniDocLayout: Towards Diverse Document Layout Generation via Coarse-to-Fine LLM Learning: Hengrui Kang,

Zhuangcheng Gu,

Zhiyuan Zhao,

Zichen Wen,

Bin Wang,

Weijia Li,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Hengrui and Gu, Zhuangcheng and Zhao, Zhiyuan and Wen, Zichen and Wang, Bin and Li, Weijia and He, Conghui}, title = {OmniDocLayout: Towards Diverse Document Layout Generation via Coarse-to-Fine LLM Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3208-3218} }
Video Generation with Stable Transparency via Shiftable RGB-A Distribution Learner: Haotian Dong,

Wenjing Wang,

Chen Li,

Jing Lyu,

Di Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Haotian and Wang, Wenjing and Li, Chen and Lyu, Jing and Lin, Di}, title = {Video Generation with Stable Transparency via Shiftable RGB-A Distribution Learner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1885-1894} }
Preference-Aligned LoRA Merging: Preserving Subspace Coverage and Addressing Directional Anisotropy: Wooseong Jeong,

Wonyoung Lee,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wooseong and Lee, Wonyoung and Yoon, Kuk-Jin}, title = {Preference-Aligned LoRA Merging: Preserving Subspace Coverage and Addressing Directional Anisotropy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {823-835} }
HanDyVQA: A Video QA Benchmark for Fine-Grained Hand-Object Interaction Dynamics: Masatoshi Tateno,

Gido Kato,

Hirokatsu Kataoka,

Yoichi Sato,

Takuma Yagi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tateno_2026_CVPR, author = {Tateno, Masatoshi and Kato, Gido and Kataoka, Hirokatsu and Sato, Yoichi and Yagi, Takuma}, title = {HanDyVQA: A Video QA Benchmark for Fine-Grained Hand-Object Interaction Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3455-3465} }
Composite-Attribute Person Re-Identification via Pose-Guided Disentanglement: Kartik Patwari,

Noranart Vesdapunt,

Chien-Yi Wang,

Dawei Li,

Cong Phuoc Huynh,

Ning Zhou,

Chen-Nee Chuah,

Kah Kuen Fu; [pdf] [supp]
[bibtex]
@InProceedings{Patwari_2026_CVPR, author = {Patwari, Kartik and Vesdapunt, Noranart and Wang, Chien-Yi and Li, Dawei and Huynh, Cong Phuoc and Zhou, Ning and Chuah, Chen-Nee and Fu, Kah Kuen}, title = {Composite-Attribute Person Re-Identification via Pose-Guided Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13812-13823} }
SpikeTrack: High-performance and Energy-efficient Event-Based Object Tracking with Spiking Neural Network: Yang Wang,

Jiqing Zhang,

Chuanyu Sun,

Qianhui Liu,

Huilin Ge,

Ziqi Wei,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yang and Zhang, Jiqing and Sun, Chuanyu and Liu, Qianhui and Ge, Huilin and Wei, Ziqi and Yang, Xin}, title = {SpikeTrack: High-performance and Energy-efficient Event-Based Object Tracking with Spiking Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {926-935} }
Stabilizing Streaming Video Geometry via Dynamic Feature Normalization: Xiaoyang Lyu,

Muxin Liu,

Xiaoshan Wu,

Ruicheng Wang,

Yi-Hua Huang,

Yang-Tian Sun,

Shaoshuai Shi,

Xiaojuan Qi; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Xiaoyang and Liu, Muxin and Wu, Xiaoshan and Wang, Ruicheng and Huang, Yi-Hua and Sun, Yang-Tian and Shi, Shaoshuai and Qi, Xiaojuan}, title = {Stabilizing Streaming Video Geometry via Dynamic Feature Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7577-7587} }
PoseGaussian: 6D Pose Estimation for Unseen Objects via Sparse-View Object-Level 3D Gaussian Splatting: Wubin Shi,

Shaoyan Gai,

Feipeng Da; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Wubin and Gai, Shaoyan and Da, Feipeng}, title = {PoseGaussian: 6D Pose Estimation for Unseen Objects via Sparse-View Object-Level 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4698-4707} }
Simple Agents Outperform Experts in Biomedical Imaging Workflow Optimization: Xuefei Wang,

Kai Horstmann,

Ethan Lin,

Jonathan Chen,

Alexander Farhang,

Sophia Stiles,

Atharva Sehgal,

Jonathan Light,

David Van Valen,

Yisong Yue,

Jennifer J. Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuefei and Horstmann, Kai and Lin, Ethan and Chen, Jonathan and Farhang, Alexander and Stiles, Sophia and Sehgal, Atharva and Light, Jonathan and Van Valen, David and Yue, Yisong and Sun, Jennifer J.}, title = {Simple Agents Outperform Experts in Biomedical Imaging Workflow Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13680-13690} }
SmokeSVD: Smoke Reconstruction from A Single View via Progressive Novel View Synthesis and Refinement with Diffusion Models: Chen Li,

Shanshan Dong,

Sheng Qiu,

Jianmin Han,

Yibo Zhao,

Zan Gao,

Taku Komura,

Kemeng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chen and Dong, Shanshan and Qiu, Sheng and Han, Jianmin and Zhao, Yibo and Gao, Zan and Komura, Taku and Huang, Kemeng}, title = {SmokeSVD: Smoke Reconstruction from A Single View via Progressive Novel View Synthesis and Refinement with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7414-7424} }
FedDAP: Domain-Aware Prototype Learning for Federated Learning under Domain Shift: Huy Q. Le,

Loc X. Nguyen,

Yu Qiao,

Seong Tae Kim,

Eui-Nam Huh,

Choong Seon Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Huy Q. and Nguyen, Loc X. and Qiao, Yu and Kim, Seong Tae and Huh, Eui-Nam and Hong, Choong Seon}, title = {FedDAP: Domain-Aware Prototype Learning for Federated Learning under Domain Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3390-3399} }
MambaLiteUNet: Cross-Gated Adaptive Feature Fusion for Robust Skin Lesion Segmentation: Md Maklachur Rahman,

Soon Ki Jung,

Tracy Hammond; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahman_2026_CVPR, author = {Rahman, Md Maklachur and Jung, Soon Ki and Hammond, Tracy}, title = {MambaLiteUNet: Cross-Gated Adaptive Feature Fusion for Robust Skin Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8556-8565} }
Global Underwater Geolocation from Time-Lapse Polarization Imagery: Sara Aghajanzadeh,

Xiaoyang Bai,

Zhongmin Zhu,

David Forsyth,

Viktor Gruev; [pdf] [supp]
[bibtex]
@InProceedings{Aghajanzadeh_2026_CVPR, author = {Aghajanzadeh, Sara and Bai, Xiaoyang and Zhu, Zhongmin and Forsyth, David and Gruev, Viktor}, title = {Global Underwater Geolocation from Time-Lapse Polarization Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6464-6473} }
Assignment-Driven Hash Learning in a Hyper-Semantic Space for On-the-Fly Category Discovery: Kaibing Yang,

Yucheng Wang,

Tingzhang Luo; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Kaibing and Wang, Yucheng and Luo, Tingzhang}, title = {Assignment-Driven Hash Learning in a Hyper-Semantic Space for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11303-11312} }
MASQuant: Modality-Aware Smoothing Quantization for Multimodal Large Language Models: Lulu Hu,

Wenhu Xiao,

Xin Chen,

Xinhua Xu,

Bowen Xu,

Kun Li,

Yongliang Tao; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Lulu and Xiao, Wenhu and Chen, Xin and Xu, Xinhua and Xu, Bowen and Li, Kun and Tao, Yongliang}, title = {MASQuant: Modality-Aware Smoothing Quantization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8707-8716} }
CRAFT-LoRA: Content-Style Personalization via Rank-Constrained Adaptation and Training-Free Fusion: Yu Li,

Yujun Cai,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yu and Cai, Yujun and Zhang, Chi}, title = {CRAFT-LoRA: Content-Style Personalization via Rank-Constrained Adaptation and Training-Free Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7654-7663} }
ResiHMR: Residual-Limb Aware Single-Image 3D Human Mesh Recovery for Individuals with Limb Loss: Jiaying Ying,

Heming Du,

Kaihao Zhang,

Sean M. Tweedy,

Xin Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2026_CVPR, author = {Ying, Jiaying and Du, Heming and Zhang, Kaihao and Tweedy, Sean M. and Yu, Xin}, title = {ResiHMR: Residual-Limb Aware Single-Image 3D Human Mesh Recovery for Individuals with Limb Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13940-13950} }
RoboWheel: A Data Engine from Real-World Human Demonstrations for Cross-Embodiment Robotic Learning: Yuhong Zhang,

Zihan Gao,

Shengpeng Li,

Ling-Hao Chen,

Kaisheng Liu,

Runqing Cheng,

Xiao Lin,

Junjia Liu,

Zhuoheng Li,

Jingyi Feng,

Ziyan He,

Jintian Lin,

Zheyan Huang,

Zhifang Liu,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuhong and Gao, Zihan and Li, Shengpeng and Chen, Ling-Hao and Liu, Kaisheng and Cheng, Runqing and Lin, Xiao and Liu, Junjia and Li, Zhuoheng and Feng, Jingyi and He, Ziyan and Lin, Jintian and Huang, Zheyan and Liu, Zhifang and Wang, Haoqian}, title = {RoboWheel: A Data Engine from Real-World Human Demonstrations for Cross-Embodiment Robotic Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6664-6674} }
Neural Distribution Prior for LiDAR Out-of-Distribution Detection: Zizhao Li,

Zhengkang Xiang,

Jiayang Ao,

Feng Liu,

Joseph West,

Kourosh Khoshelham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zizhao and Xiang, Zhengkang and Ao, Jiayang and Liu, Feng and West, Joseph and Khoshelham, Kourosh}, title = {Neural Distribution Prior for LiDAR Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3035-3045} }
Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern: Xiaopei Zhu,

Guanning Zeng,

Zhanhao Hu,

Jun Zhu,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaopei and Zeng, Guanning and Hu, Zhanhao and Zhu, Jun and Hu, Xiaolin}, title = {Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13356-13365} }
LazyVAR: Accelerating Visual Autoregressive Models via Scale-wise Token Pruning and Parallel Group Decoding: Rongge Mao,

Chengqi Dong,

S Kevin Zhou; [pdf]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Rongge and Dong, Chengqi and Zhou, S Kevin}, title = {LazyVAR: Accelerating Visual Autoregressive Models via Scale-wise Token Pruning and Parallel Group Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12129-12139} }
HiSpatial: Taming Hierarchical 3D Spatial Understanding in Vision-Language Models: Huizhi Liang,

Yichao Shen,

Yu Deng,

Sicheng Xu,

ZhiYuan Feng,

Tong Zhang,

Yaobo Liang,

Jiaolong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Huizhi and Shen, Yichao and Deng, Yu and Xu, Sicheng and Feng, ZhiYuan and Zhang, Tong and Liang, Yaobo and Yang, Jiaolong}, title = {HiSpatial: Taming Hierarchical 3D Spatial Understanding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2502-2514} }
EmoThinker: Advancing Visual-Acoustic Emotion Analysis via Structural Token Selection and Chain-of-Thought Reasoning: Qinfu Xu,

Liyuan Pan,

Yiwei Wei,

Shaozu Yuan,

Jiaqi Chen,

Tianyu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qinfu and Pan, Liyuan and Wei, Yiwei and Yuan, Shaozu and Chen, Jiaqi and Liu, Tianyu}, title = {EmoThinker: Advancing Visual-Acoustic Emotion Analysis via Structural Token Selection and Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1672-1682} }
ARC Is a Vision Problem!: Keya Hu,

Ali Cy,

Linlu Qiu,

Xiaoman Delores Ding,

Runqian Wang,

Yeyin Eva Zhu,

Jacob Andreas,

Kaiming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Keya and Cy, Ali and Qiu, Linlu and Ding, Xiaoman Delores and Wang, Runqian and Zhu, Yeyin Eva and Andreas, Jacob and He, Kaiming}, title = {ARC Is a Vision Problem!}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2537-2546} }
VIRST: Video-Instructed Reasoning Assistant for SpatioTemporal Segmentation: Jihwan Hong,

Jaeyoung Do; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Jihwan and Do, Jaeyoung}, title = {VIRST: Video-Instructed Reasoning Assistant for SpatioTemporal Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3410-3420} }
Real-Time Dynamic Scene Rendering with Controlled Compressibility and Contact Awareness: Boya Shi,

Naiyang Guan,

Xiaodong Yi; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Boya and Guan, Naiyang and Yi, Xiaodong}, title = {Real-Time Dynamic Scene Rendering with Controlled Compressibility and Contact Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8308-8318} }
Complementary Prototype Mapping for Efficient Multimodal Anomaly Detection: Yuan Zhao,

Xiaoqin Zhang,

Huchuan Lu,

Lihe Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuan and Zhang, Xiaoqin and Lu, Huchuan and Zhang, Lihe}, title = {Complementary Prototype Mapping for Efficient Multimodal Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14178-14187} }
TempR1: Improving Temporal Understanding of MLLMs via Temporal-Aware Multi-Task Reinforcement Learning: Tao Wu,

Li Yang,

Gen Zhan,

Yabin Zhang,

Yiting Liao,

Junlin Li,

Deliang Fu,

Li Zhang,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Yang, Li and Zhan, Gen and Zhang, Yabin and Liao, Yiting and Li, Junlin and Fu, Deliang and Zhang, Li and Wang, Limin}, title = {TempR1: Improving Temporal Understanding of MLLMs via Temporal-Aware Multi-Task Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2756-2767} }
DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching: Chang Zou,

Changlin Li,

Songtao Liu,

Zhao Zhong,

Kailin Huang,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Chang and Li, Changlin and Liu, Songtao and Zhong, Zhao and Huang, Kailin and Zhang, Linfeng}, title = {DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4590-4601} }
KnowVal: A Knowledge-Augmented and Value-Guided Autonomous Driving System: Zhongyu Xia,

Wenhao Chen,

Yongtao Wang,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zhongyu and Chen, Wenhao and Wang, Yongtao and Yang, Ming-Hsuan}, title = {KnowVal: A Knowledge-Augmented and Value-Guided Autonomous Driving System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3740-3749} }
EfficientMonoHair: Fast Strand-Level Reconstruction from Monocular Video via Multi-View Direction Fusion: Da Li,

Dominik Engel,

Deng Luo,

Ivan Viola; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Da and Engel, Dominik and Luo, Deng and Viola, Ivan}, title = {EfficientMonoHair: Fast Strand-Level Reconstruction from Monocular Video via Multi-View Direction Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7610-7619} }
OSMO: Open-vocabulary Self-eMOtion Tracking: Mohamed Abdelfattah,

Bugra Tekin,

Fadime Sener,

Necati Cihan Camgoz,

Eric Sauser,

Shugao Ma,

Alexandre Alahi,

Edoardo Remelli; [pdf]
[bibtex]
@InProceedings{Abdelfattah_2026_CVPR, author = {Abdelfattah, Mohamed and Tekin, Bugra and Sener, Fadime and Camgoz, Necati Cihan and Sauser, Eric and Ma, Shugao and Alahi, Alexandre and Remelli, Edoardo}, title = {OSMO: Open-vocabulary Self-eMOtion Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1737-1748} }
DyaDiT: A Multi-Modal Diffusion Transformer for Socially Favorable Dyadic Gesture Generation: Yichen Peng,

Jyun-Ting Song,

Siyeol Jung,

Ulsan National Institute of Science & Technology blank,

Ruofan Liu,

Haiyang Liu,

Xuangeng Chu,

Ruicong Liu,

Erwin Wu,

Hideki Koike,

Kris Kitani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Yichen and Song, Jyun-Ting and Jung, Siyeol and blank, Ulsan National Institute of Science \& Technology and Liu, Ruofan and Liu, Haiyang and Chu, Xuangeng and Liu, Ruicong and Wu, Erwin and Koike, Hideki and Kitani, Kris}, title = {DyaDiT: A Multi-Modal Diffusion Transformer for Socially Favorable Dyadic Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10932-10942} }
CMR-RD: Long-Tailed Adaptive VLM for Explainable CMR Diagnosis: Yansong Li,

Zhongxi Qiu,

Yun Tian,

Zheng Jinyu,

Shuo Li; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yansong and Qiu, Zhongxi and Tian, Yun and Jinyu, Zheng and Li, Shuo}, title = {CMR-RD: Long-Tailed Adaptive VLM for Explainable CMR Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7003-7013} }
CausalLens: Sensitivity-Guided Multi-Head Causal Intervention for Hallucination Mitigation in Large Vision-Language Models: Junyang Ji,

Qifan Liu,

Wenming Yang,

Zhihai He; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Junyang and Liu, Qifan and Yang, Wenming and He, Zhihai}, title = {CausalLens: Sensitivity-Guided Multi-Head Causal Intervention for Hallucination Mitigation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4199-4209} }
Generalizable Radio-Frequency Radiance Fields for Spatial Spectrum Synthesis: Kang Yang,

Yuning Chen,

Wan Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Kang and Chen, Yuning and Du, Wan}, title = {Generalizable Radio-Frequency Radiance Fields for Spatial Spectrum Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12533-12543} }
SymphoMotion: Joint Control of Camera Motion and Object Dynamics for Coherent Video Generation: Guiyu Zhang,

Yabo Chen,

Xunzhi Xiang,

Junchao Huang,

Zhongyu Wang,

Li Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guiyu and Chen, Yabo and Xiang, Xunzhi and Huang, Junchao and Wang, Zhongyu and Jiang, Li}, title = {SymphoMotion: Joint Control of Camera Motion and Object Dynamics for Coherent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11127-11137} }
GraspALL: Adaptive Structural Compensation from Illumination Variation for Robotic Garment Grasping in Any Low-Light Conditions: Haifeng Zhong,

Wenshuo Han,

Zhouyu Wang,

Runyang Feng,

Fan Tang,

Tong-Yee Lee,

Zipei Fan,

Ruihai Wu,

Yuran Wang,

Hao Dong,

Hechang Chen,

Hyung Jin Chang,

Yixing Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Haifeng and Han, Wenshuo and Wang, Zhouyu and Feng, Runyang and Tang, Fan and Lee, Tong-Yee and Fan, Zipei and Wu, Ruihai and Wang, Yuran and Dong, Hao and Chen, Hechang and Chang, Hyung Jin and Gao, Yixing}, title = {GraspALL: Adaptive Structural Compensation from Illumination Variation for Robotic Garment Grasping in Any Low-Light Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6631-6641} }
Measure The Feature Universe: Topology-based Pseudo Labeling and Gravity Consistency for Source-Free Domain Adaptation: Jae Yun Lee,

Hyeok Nam,

Sung In Cho; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jae Yun and Nam, Hyeok and Cho, Sung In}, title = {Measure The Feature Universe: Topology-based Pseudo Labeling and Gravity Consistency for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3617-3626} }
Boosting Vision-Language Models Towards Cross-Domain Incremental Object Detection: Xu Wang,

Zihan Lin,

Yixin Zhang,

Zilei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xu and Lin, Zihan and Zhang, Yixin and Wang, Zilei}, title = {Boosting Vision-Language Models Towards Cross-Domain Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6249-6260} }
Cross-Slice Knowledge Transfer via Masked Multi-Modal Heterogeneous Graph Contrastive Learning for Spatial Gene Expression Inference: Zhiceng Shi,

Changmiao Wang,

Jun Wan,

Wenwen Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhiceng and Wang, Changmiao and Wan, Jun and Min, Wenwen}, title = {Cross-Slice Knowledge Transfer via Masked Multi-Modal Heterogeneous Graph Contrastive Learning for Spatial Gene Expression Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5710-5719} }
SemVideo: Reconstructs What You Watch from Brain Activity via Hierarchical Semantic Guidance: Minghan Yang,

Lan Yang,

Ke Li,

Honggang Zhang,

Kaiyue Pang,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Minghan and Yang, Lan and Li, Ke and Zhang, Honggang and Pang, Kaiyue and Song, Yi-Zhe}, title = {SemVideo: Reconstructs What You Watch from Brain Activity via Hierarchical Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13658-13669} }
3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion: Minchong Chen,

Xiaoyun Yuan,

Junzhe Wan,

Jianing Zhang,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Minchong and Yuan, Xiaoyun and Wan, Junzhe and Zhang, Jianing and Zhang, Jun}, title = {3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5659-5669} }
Black-Box Domain Adaptation for Object Detection with Retention-Driven Knowledge Compression: Yuwu Lu,

Chunzhi Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yuwu and Liu, Chunzhi}, title = {Black-Box Domain Adaptation for Object Detection with Retention-Driven Knowledge Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {801-811} }
FoundIR-v2: Optimizing Pre-Training Data Mixtures for Image Restoration Foundation Model: Xiang Chen,

Jinshan Pan,

Jiangxin Dong,

Jian Yang,

Jinhui Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiang and Pan, Jinshan and Dong, Jiangxin and Yang, Jian and Tang, Jinhui}, title = {FoundIR-v2: Optimizing Pre-Training Data Mixtures for Image Restoration Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8471-8480} }
Learning Coordinate-based Convolutional Kernels for Continuous SE(3) Equivariant and Efficient Point Cloud Analysis: Jaein Kim,

Hee Bin Yoo,

Dong-Sig Han,

Byoung-Tak Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jaein and Bin Yoo, Hee and Han, Dong-Sig and Zhang, Byoung-Tak}, title = {Learning Coordinate-based Convolutional Kernels for Continuous SE(3) Equivariant and Efficient Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9986-9995} }
PRISM: Learning a Shared Primitive Space for Transferable Skeleton Action Representation: Di Yang,

Yaohui Wang,

Shuai Shao,

François Brémond,

Jiangtao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Di and Wang, Yaohui and Shao, Shuai and Br\'emond, Fran\c{c}ois and Wang, Jiangtao}, title = {PRISM: Learning a Shared Primitive Space for Transferable Skeleton Action Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6085-6094} }
Stay in your Lane: Role Specific Queries with Overlap Suppression Loss for Dense Video Captioning: Seung Hyup Baek,

Jimin Lee,

Hyeongkeun Lee,

Jae Won Cho; [pdf] [arXiv]
[bibtex]
@InProceedings{Baek_2026_CVPR, author = {Baek, Seung Hyup and Lee, Jimin and Lee, Hyeongkeun and Cho, Jae Won}, title = {Stay in your Lane: Role Specific Queries with Overlap Suppression Loss for Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3432-3442} }
HumanVBench: Probing Human-Centric Video Understanding in MLLMs with Automatically Synthesized Benchmarks: Ting Zhou,

Daoyuan Chen,

Qirui Jiao,

Bolin Ding,

Yaliang Li,

Ying Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Ting and Chen, Daoyuan and Jiao, Qirui and Ding, Bolin and Li, Yaliang and Shen, Ying}, title = {HumanVBench: Probing Human-Centric Video Understanding in MLLMs with Automatically Synthesized Benchmarks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4494-4504} }
SpikeTrack: A Spike-driven Framework for Efficient Visual Tracking: Qiuyang Zhang,

Jiujun Cheng,

Qichao Mao,

Cong Liu,

Yu Fang,

Yuhong Li,

Mengying Ge,

Shangce Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qiuyang and Cheng, Jiujun and Mao, Qichao and Liu, Cong and Fang, Yu and Li, Yuhong and Ge, Mengying and Gao, Shangce}, title = {SpikeTrack: A Spike-driven Framework for Efficient Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6802-6811} }
Beyond Single-View Sufficiency: CVBench for Cross-View Human Understanding: Tianchen Guo,

Chen Liu,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Tianchen and Liu, Chen and Yu, Xin}, title = {Beyond Single-View Sufficiency: CVBench for Cross-View Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7154-7164} }
Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition: Minxue Tang,

Yangyang Yu,

Aolin Ding,

Maziyar Baran Pouyan,

Taha Belkhouja,

Yujia Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Minxue and Yu, Yangyang and Ding, Aolin and Pouyan, Maziyar Baran and Belkhouja, Taha and Bao, Yujia}, title = {Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7978-7989} }
Energy-GS: Image Energy-guided Pose Alignment Gaussian Splatting with redesigned pose gradient flow: Yu Gao,

Lutong Su,

Ruixiang Huang,

Tianji Jiang,

Jiadong Tang,

Yufeng Yue,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yu and Su, Lutong and Huang, Ruixiang and Jiang, Tianji and Tang, Jiadong and Yue, Yufeng and Yang, Yi}, title = {Energy-GS: Image Energy-guided Pose Alignment Gaussian Splatting with redesigned pose gradient flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7310-7319} }
Mostly Text, Smart Visuals: Asymmetric Text-Visual Pruning for Large Vision-Language Models: Sijie Li,

Biao Qian,

Jungong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Sijie and Qian, Biao and Han, Jungong}, title = {Mostly Text, Smart Visuals: Asymmetric Text-Visual Pruning for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10472-10481} }
FedHarmony: Harmonizing Heterogeneous Label Correlations in Federated Multi-Label Learning: Zhiqiang Kou,

Junxiang Wu,

Wenke Huang,

Wenwen He,

Ming-Kun Xie,

Changwei Wang,

Yuheng Jia,

Di Jiang,

Yang Liu,

Xin Geng,

Qiang Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Kou_2026_CVPR, author = {Kou, Zhiqiang and Wu, Junxiang and Huang, Wenke and He, Wenwen and Xie, Ming-Kun and Wang, Changwei and Jia, Yuheng and Jiang, Di and Liu, Yang and Geng, Xin and Yang, Qiang}, title = {FedHarmony: Harmonizing Heterogeneous Label Correlations in Federated Multi-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10324-10334} }
MuViT: Multi-Resolution Vision Transformers for Learning Across Scales in Microscopy: Albert Dominguez Mantes,

Gioele La Manno,

Martin Weigert; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mantes_2026_CVPR, author = {Mantes, Albert Dominguez and La Manno, Gioele and Weigert, Martin}, title = {MuViT: Multi-Resolution Vision Transformers for Learning Across Scales in Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13648-13657} }
SGSoft: Learning Fused Semantic-Geometric Features for 3D Shape Correspondence via Template-Guided Soft Signals: Soyeon Yoon,

Chang Wook Seo,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Soyeon and Seo, Chang Wook and Shim, Hyunjung}, title = {SGSoft: Learning Fused Semantic-Geometric Features for 3D Shape Correspondence via Template-Guided Soft Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7142-7153} }
OMG-Bench: A New Challenging Benchmark for Skeleton-based Online Micro Hand Gesture Recognition: Haochen Chang,

Pengfei Ren,

Buyuan Zhang,

Da Li,

Tianhao Han,

Haoyang Zhang,

Liang Xie,

Hongbo Chen,

Erwei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Haochen and Ren, Pengfei and Zhang, Buyuan and Li, Da and Han, Tianhao and Zhang, Haoyang and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {OMG-Bench: A New Challenging Benchmark for Skeleton-based Online Micro Hand Gesture Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7068-7078} }
Reinforcing Structured Chain-of-Thought for Video Understanding: Peiyao Wang,

Haotian Xu,

Noranart Vesdapunt,

Rui Hou,

Jingyi Zhang,

Haibin Ling,

Oleksandr Obiednikov,

Ning Zhou,

Kah Kuen Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Peiyao and Xu, Haotian and Vesdapunt, Noranart and Hou, Rui and Zhang, Jingyi and Ling, Haibin and Obiednikov, Oleksandr and Zhou, Ning and Fu, Kah Kuen}, title = {Reinforcing Structured Chain-of-Thought for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9794-9803} }
Dynamic-eDiTor: Training-Free Text-Driven 4D Scene Editing with Multimodal Diffusion Transformer: Dong In Lee,

Hyungjun Doh,

Seunggeun Chi,

Runlin Duan,

Sangpil Kim,

Karthik Ramani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Dong In and Doh, Hyungjun and Chi, Seunggeun and Duan, Runlin and Kim, Sangpil and Ramani, Karthik}, title = {Dynamic-eDiTor: Training-Free Text-Driven 4D Scene Editing with Multimodal Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1187-1197} }
Guiding Diffusion-based Reconstruction with Contrastive Signals for Balanced Visual Representation: Boyu Han,

Qianqian Xu,

Shilong Bao,

Zhiyong Yang,

Ruochen Cui,

Xilin Zhao,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Boyu and Xu, Qianqian and Bao, Shilong and Yang, Zhiyong and Cui, Ruochen and Zhao, Xilin and Huang, Qingming}, title = {Guiding Diffusion-based Reconstruction with Contrastive Signals for Balanced Visual Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2369-2380} }
Learning to Refuse: Refusal-Aware Reinforcement Fine-Tuning for Hard-Irrelevant Queries in Video Temporal Grounding: Jin-Seop Lee,

SungJoon Lee,

SeongJun Jung,

Boyang Li,

Jee-Hyong Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jin-Seop and Lee, SungJoon and Jung, SeongJun and Li, Boyang and Lee, Jee-Hyong}, title = {Learning to Refuse: Refusal-Aware Reinforcement Fine-Tuning for Hard-Irrelevant Queries in Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10397-10407} }
Rethinking Concept Bottleneck Models: From Pitfalls to Solutions: Merve Tapli,

Quentin Bouniot,

Wolfgang Stammer,

Zeynep Akata,

Emre Akbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tapli_2026_CVPR, author = {Tapli, Merve and Bouniot, Quentin and Stammer, Wolfgang and Akata, Zeynep and Akbas, Emre}, title = {Rethinking Concept Bottleneck Models: From Pitfalls to Solutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9901-9910} }
WildRayZer: Self-supervised Large View Synthesis in Dynamic Environments: Xuweiyi Chen,

Wentao Zhou,

Zezhou Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xuweiyi and Zhou, Wentao and Cheng, Zezhou}, title = {WildRayZer: Self-supervised Large View Synthesis in Dynamic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1252-1264} }
Attribution as Retrieval: Model-Agnostic AI-Generated Image Attribution: Hongsong Wang,

Renxi Cheng,

Chaolei Han,

Jie Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hongsong and Cheng, Renxi and Han, Chaolei and Gui, Jie}, title = {Attribution as Retrieval: Model-Agnostic AI-Generated Image Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14062-14072} }
Spectrally Distilled Representations Aligned with Instruction-Augmented LLMs for Satellite Imagery: Minh Kha Do,

Wei Xiang,

Kang Han,

Di Wu,

Khoa Phan,

Yi-Ping Phoebe Chen,

Gaowen Liu,

Ramana Rao Kompella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Do_2026_CVPR, author = {Do, Minh Kha and Xiang, Wei and Han, Kang and Wu, Di and Phan, Khoa and Chen, Yi-Ping Phoebe and Liu, Gaowen and Kompella, Ramana Rao}, title = {Spectrally Distilled Representations Aligned with Instruction-Augmented LLMs for Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6453-6463} }
Towards Knowledge-augmented Bayesian Deep Learning For Computer Vision: Wang Ma,

Hanjing Wang,

Yufei Zhang,

Darsha Udayanga,

Qiang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Wang and Wang, Hanjing and Zhang, Yufei and Udayanga, Darsha and Ji, Qiang}, title = {Towards Knowledge-augmented Bayesian Deep Learning For Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6136-6146} }
Spectrum from Defocus: Fast Spectral Imaging with Chromatic Focal Stack: M. Kerem Aydin,

Yi-Chun Hung,

Jaclyn Pytlarz,

Qi Guo,

Emma Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aydin_2026_CVPR, author = {Aydin, M. Kerem and Hung, Yi-Chun and Pytlarz, Jaclyn and Guo, Qi and Alexander, Emma}, title = {Spectrum from Defocus: Fast Spectral Imaging with Chromatic Focal Stack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {220-230} }
IF-Prune: Information-Flow Guided Token Pruning for Efficient Vision-Language Models: Guohao Sun,

Yufei Wang,

Sizhuo Ma,

Yuege Xie,

Yuting Cheng,

Zhiqiang Tao,

Jian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Guohao and Wang, Yufei and Ma, Sizhuo and Xie, Yuege and Cheng, Yuting and Tao, Zhiqiang and Wang, Jian}, title = {IF-Prune: Information-Flow Guided Token Pruning for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3522-3531} }
Curvature-Aware Zeroth-Order Optimization for Memory-Efficient Test-Time Adaptation: Junming Zhang,

Shuyu Yin,

Peilin Liu,

Rendong Ying,

Fei Wen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Yin, Shuyu and Liu, Peilin and Ying, Rendong and Wen, Fei}, title = {Curvature-Aware Zeroth-Order Optimization for Memory-Efficient Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {836-846} }
VL-RouterBench: A Benchmark for Vision-Language Model Routing: Zhehao Huang,

Baijiong Lin,

Jingyuan Zhang,

Jingying Wang,

Yuhang Liu,

Ning Lu,

Tao Li,

Xiaolin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhehao and Lin, Baijiong and Zhang, Jingyuan and Wang, Jingying and Liu, Yuhang and Lu, Ning and Li, Tao and Huang, Xiaolin}, title = {VL-RouterBench: A Benchmark for Vision-Language Model Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9512-9523} }
RevINN: An End-to-End Invertible Neural Network for Reversible Adversarial Examples Generation: Jielun Huang,

Chi-Man Pun,

Guoheng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jielun and Pun, Chi-Man and Huang, Guoheng}, title = {RevINN: An End-to-End Invertible Neural Network for Reversible Adversarial Examples Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6601-6610} }
Neural Gabor Splatting: Enhanced Gaussian Splatting with Neural Gabor for High-frequency Surface Reconstruction: Haato Watanabe,

Nobuyuki Umetani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Watanabe_2026_CVPR, author = {Watanabe, Haato and Umetani, Nobuyuki}, title = {Neural Gabor Splatting: Enhanced Gaussian Splatting with Neural Gabor for High-frequency Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4932-4941} }
Attention-aware Inference Optimizations for Large Vision-Language Models with Memory-efficient Decoding: Fatih Ilhan,

Gaowen Liu,

Ramana Rao Kompella,

Selim Furkan Tekin,

Tiansheng Huang,

Zachary Yahn,

Yichang Xu,

Ling Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ilhan_2026_CVPR, author = {Ilhan, Fatih and Liu, Gaowen and Kompella, Ramana Rao and Tekin, Selim Furkan and Huang, Tiansheng and Yahn, Zachary and Xu, Yichang and Liu, Ling}, title = {Attention-aware Inference Optimizations for Large Vision-Language Models with Memory-efficient Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10482-10491} }
RoadGIE: Towards A Global-Scale Aerial Benchmark for Generalizable Interactive Road Extraction: Chenxu Peng,

Chenxu Wang,

Yimian Dai,

Yongxiang Liu,

Ming-Ming Cheng,

Xiang Li; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Chenxu and Wang, Chenxu and Dai, Yimian and Liu, Yongxiang and Cheng, Ming-Ming and Li, Xiang}, title = {RoadGIE: Towards A Global-Scale Aerial Benchmark for Generalizable Interactive Road Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13285-13295} }
UniGeoRS: A Unified Benchmark for Tri-view Geo-Localization: Xiao Liang,

Huaizhi Tang,

Feiyang Zhang,

Shiji Yuan,

Chun Hu,

Dezhi Zheng,

Kang Ma; [pdf]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Xiao and Tang, Huaizhi and Zhang, Feiyang and Yuan, Shiji and Hu, Chun and Zheng, Dezhi and Ma, Kang}, title = {UniGeoRS: A Unified Benchmark for Tri-view Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5399-5408} }
AHS: Adaptive Head Synthesis via Synthetic Data Augmentations: Taewoong Kang,

Hyojin Jang,

Sohyun Jeong,

Seunggi Moon,

Gihwi Kim,

Hoon Jin Jung,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Taewoong and Jang, Hyojin and Jeong, Sohyun and Moon, Seunggi and Kim, Gihwi and Jung, Hoon Jin and Choo, Jaegul}, title = {AHS: Adaptive Head Synthesis via Synthetic Data Augmentations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2125-2135} }
Your Dissimilarities Define You: Complementary Learning Exploiting Class Diversities: Dimitrios Katsikas,

Nikolaos Passalis,

Anastasios Tefas; [pdf] [supp]
[bibtex]
@InProceedings{Katsikas_2026_CVPR, author = {Katsikas, Dimitrios and Passalis, Nikolaos and Tefas, Anastasios}, title = {Your Dissimilarities Define You: Complementary Learning Exploiting Class Diversities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10512-10521} }
Asynchronous Temporal Modeling with Two-Agent Framework for Streaming Dense Video Captioning: Yolo Y. Tang,

Chao Huang,

Susan Liang,

Jing Bi,

Yicheng Wang,

Daiki Shimada,

Chenliang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Y. and Huang, Chao and Liang, Susan and Bi, Jing and Wang, Yicheng and Shimada, Daiki and Xu, Chenliang}, title = {Asynchronous Temporal Modeling with Two-Agent Framework for Streaming Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2799-2810} }
Training One Model to Master Cross-Level Agentic Actions via Reinforcement Learning: Kaichen He,

Zihao Wang,

Muyao Li,

Anji Liu,

Yitao Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Kaichen and Wang, Zihao and Li, Muyao and Liu, Anji and Liang, Yitao}, title = {Training One Model to Master Cross-Level Agentic Actions via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {724-734} }
Mind the Hitch: Dynamic Calibration and Articulated Perception for Autonomous Trucks: Morui Zhu,

Yongqi Zhu,

Song Fu,

Qing Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Morui and Zhu, Yongqi and Fu, Song and Yang, Qing}, title = {Mind the Hitch: Dynamic Calibration and Articulated Perception for Autonomous Trucks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10668-10677} }
Beyond Text: Visual Description Assembly by Probabilistic Model for CLIP-based Weakly Supervised Semantic Segmentation: Xianglin Qiu,

Jian Wang,

Xiaolei Wang,

Zhen Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xianglin and Wang, Jian and Wang, Xiaolei and Zhang, Zhen and Xiao, Jimin}, title = {Beyond Text: Visual Description Assembly by Probabilistic Model for CLIP-based Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6346-6356} }
R4-CGQA: Retrieval-based Vision Language Models for Computer Graphics Image Quality Assessment: Zhuangzi Li,

Jian Jin,

Shilv Cai,

Weisi Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhuangzi and Jin, Jian and Cai, Shilv and Lin, Weisi}, title = {R4-CGQA: Retrieval-based Vision Language Models for Computer Graphics Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9468-9477} }
ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior: Weikai Lu,

Ziqian Zeng,

Kehua Zhang,

Haoran Li,

Huiping Zhuang,

Ruidong Wang,

Cen Chen,

Hao Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Weikai and Zeng, Ziqian and Zhang, Kehua and Li, Haoran and Zhuang, Huiping and Wang, Ruidong and Chen, Cen and Peng, Hao}, title = {ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31-40} }
SLVMEval: Synthetic Meta Evaluation Benchmark for Text-to-Long Video Generation: Ryosuke Matsuda,

Keito Kudo,

Haruto Yoshida,

Nobuyuki Shimizu,

Jun Suzuki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsuda_2026_CVPR, author = {Matsuda, Ryosuke and Kudo, Keito and Yoshida, Haruto and Shimizu, Nobuyuki and Suzuki, Jun}, title = {SLVMEval: Synthetic Meta Evaluation Benchmark for Text-to-Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7784-7794} }
Discovering Adaptive Task Dependencies for Efficient Multi-Task Representation Compression: Zhimeng Huang,

Rongao Yuan,

Junlong Gao,

Qi Mao,

Siwei Ma,

Wen Gao,

Chuanmin Jia; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhimeng and Yuan, Rongao and Gao, Junlong and Mao, Qi and Ma, Siwei and Gao, Wen and Jia, Chuanmin}, title = {Discovering Adaptive Task Dependencies for Efficient Multi-Task Representation Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5326-5336} }
Lynx: Towards High-Fidelity Personalized Video Generation: Shen Sang,

Tiancheng Zhi,

Tianpei Gu,

Jing Liu,

Linjie Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sang_2026_CVPR, author = {Sang, Shen and Zhi, Tiancheng and Gu, Tianpei and Liu, Jing and Luo, Linjie}, title = {Lynx: Towards High-Fidelity Personalized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9192-9202} }
Scaling Multi-Identity Consistency for Image Customization via Multi-to-Multi Matching Paradigm: Yufeng Cheng,

Wenxu Wu,

Shaojin Wu,

Mengqi Huang,

Fei Ding,

Qian He; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yufeng and Wu, Wenxu and Wu, Shaojin and Huang, Mengqi and Ding, Fei and He, Qian}, title = {Scaling Multi-Identity Consistency for Image Customization via Multi-to-Multi Matching Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1906-1916} }
Towards Human-Imperceptible Backdoor Attacks on Text-to-Image Diffusion Models: Yiming Wu,

Chenghao Chen,

Changkun Wu,

Chong Fu,

Biru Zhu,

Zhenyu Wen,

Zhen Hong; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yiming and Chen, Chenghao and Wu, Changkun and Fu, Chong and Zhu, Biru and Wen, Zhenyu and Hong, Zhen}, title = {Towards Human-Imperceptible Backdoor Attacks on Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1503-1512} }
Selection-as-Nonlinearity: Bridging Attention and Activation via a Joint Game-Decision Lens for Interpretable, Discriminative Visual Representations: Sudong Cai,

Shuai Yuan,

Bingzhi Chen,

Rui Mao,

Bing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Sudong and Yuan, Shuai and Chen, Bingzhi and Mao, Rui and Wang, Bing}, title = {Selection-as-Nonlinearity: Bridging Attention and Activation via a Joint Game-Decision Lens for Interpretable, Discriminative Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11621-11631} }
Let Your Image Move with Your Motion! -- Implicit Multi-Object Multi-Motion Transfer: Yuze Li,

Dong Gong,

Xiao Cao,

Junchao Yuan,

Dongsheng Li,

Lei Zhou,

Yun Sing Koh,

Cheng Yan,

Xinyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuze and Gong, Dong and Cao, Xiao and Yuan, Junchao and Li, Dongsheng and Zhou, Lei and Koh, Yun Sing and Yan, Cheng and Zhang, Xinyu}, title = {Let Your Image Move with Your Motion! -- Implicit Multi-Object Multi-Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11207-11217} }
Sky2Ground: A Benchmark for Site Modeling under Varying Altitude: Zengyan Wang,

Sirshapan Mitra,

Rajat Modi,

Hui Lim,

Yogesh Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zengyan and Mitra, Sirshapan and Modi, Rajat and Lim, Hui and Rawat, Yogesh}, title = {Sky2Ground: A Benchmark for Site Modeling under Varying Altitude}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12227-12236} }
FORCE: Transferable Visual Jailbreaking Attacks via Feature Over-Reliance CorrEction: Runqi Lin,

Alasdair Paren,

Suqin Yuan,

Muyang Li,

Philip Torr,

Adel Bibi,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Runqi and Paren, Alasdair and Yuan, Suqin and Li, Muyang and Torr, Philip and Bibi, Adel and Liu, Tongliang}, title = {FORCE: Transferable Visual Jailbreaking Attacks via Feature Over-Reliance CorrEction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8610-8620} }
E-3DPSM: A State Machine for Event-based Egocentric 3D Human Pose Estimation: Mayur Deshmukh,

Hiroyasu Akada,

Helge Rhodin,

Christian Theobalt,

Vladislav Golyanik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deshmukh_2026_CVPR, author = {Deshmukh, Mayur and Akada, Hiroyasu and Rhodin, Helge and Theobalt, Christian and Golyanik, Vladislav}, title = {E-3DPSM: A State Machine for Event-based Egocentric 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14017-14026} }
Semi-supervised Echocardiography Video Segmentation via Anchor Semantic Awareness and Continuous Pseudo-label Reforging: Yunpeng Fang,

Yimu Sun,

Jingxing Guo,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Yunpeng and Sun, Yimu and Guo, Jingxing and Wu, Huisi and Qin, Jing}, title = {Semi-supervised Echocardiography Video Segmentation via Anchor Semantic Awareness and Continuous Pseudo-label Reforging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8535-8544} }
The Devil is in Attention Sharing: Improving Complex Non-rigid Image Editing Faithfulness via Attention Synergy: Zhuo Chen,

Fanyue Wei,

Runze Xu,

Jingjing Li,

Lixin Duan,

Angela Yao,

Wen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuo and Wei, Fanyue and Xu, Runze and Li, Jingjing and Duan, Lixin and Yao, Angela and Li, Wen}, title = {The Devil is in Attention Sharing: Improving Complex Non-rigid Image Editing Faithfulness via Attention Synergy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8237-8246} }
OmniZip: Learning a Unified and Lightweight Lossless Compressor for Multi-Modal Data: Yan Zhao,

Zhengxue Cheng,

Junxuan Zhang,

Dajiang Zhou,

Qunshan Gu,

Qi Wang,

Li Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yan and Cheng, Zhengxue and Zhang, Junxuan and Zhou, Dajiang and Gu, Qunshan and Wang, Qi and Song, Li}, title = {OmniZip: Learning a Unified and Lightweight Lossless Compressor for Multi-Modal Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5337-5347} }
ProjFlow: Projection Sampling with Flow Matching for Zero-Shot Exact Spatial Motion Control: Akihisa Watanabe,

Qing Yu,

Edgar Simo-Serra,

Kent Fujiwara; [pdf] [supp]
[bibtex]
@InProceedings{Watanabe_2026_CVPR, author = {Watanabe, Akihisa and Yu, Qing and Simo-Serra, Edgar and Fujiwara, Kent}, title = {ProjFlow: Projection Sampling with Flow Matching for Zero-Shot Exact Spatial Motion Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2305-2315} }
Diverse Video Generation with Determinantal Point Process-Guided Policy Optimization: Tahira Kazimi,

Connor Dunlop,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kazimi_2026_CVPR, author = {Kazimi, Tahira and Dunlop, Connor and Yanardag, Pinar}, title = {Diverse Video Generation with Determinantal Point Process-Guided Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12839-12848} }
Proxy-GS: Unified Occlusion Priors for Training and Inference in Structured 3D Gaussian Splatting: Yuanyuan Gao,

Yuning Gong,

Yifei Liu,

Jingfeng Li,

Dan Xu,

Yanci Zhang,

Dingwen Zhang,

Xiao Sun,

Zhihang Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuanyuan and Gong, Yuning and Liu, Yifei and Li, Jingfeng and Xu, Dan and Zhang, Yanci and Zhang, Dingwen and Sun, Xiao and Zhong, Zhihang}, title = {Proxy-GS: Unified Occlusion Priors for Training and Inference in Structured 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7330-7339} }
AnyPcc: Compressing Any Point Cloud with a Single Universal Model: Kangli Wang,

Qianxi Yi,

Yuqi Ye,

Shihao Li,

Wei Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Kangli and Yi, Qianxi and Ye, Yuqi and Li, Shihao and Gao, Wei}, title = {AnyPcc: Compressing Any Point Cloud with a Single Universal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2972-2982} }
InternData-A1: Pioneering High-Fidelity Synthetic Data for Pre-training Generalist Policy: Yang Tian,

Yuyin Yang,

Yiman Xie,

Zetao Cai,

Xu Shi,

Ning Gao,

Hangxu Liu,

Xuekun Jiang,

Zherui Qiu,

Feng Yuan,

Yaping Li,

Ping Wang,

Junhao Cai,

Jia Zeng,

Hao Dong,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Yang and Yang, Yuyin and Xie, Yiman and Cai, Zetao and Shi, Xu and Gao, Ning and Liu, Hangxu and Jiang, Xuekun and Qiu, Zherui and Yuan, Feng and Li, Yaping and Wang, Ping and Cai, Junhao and Zeng, Jia and Dong, Hao and Pang, Jiangmiao}, title = {InternData-A1: Pioneering High-Fidelity Synthetic Data for Pre-training Generalist Policy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {976-985} }
ART: Articulated Reconstruction Transformer: Zizhang Li,

Cheng Zhang,

Zhengqin Li,

Henry Howard-Jenkins,

Zhaoyang Lv,

Chen Geng,

Jiajun Wu,

Richard Newcombe,

Jakob Engel,

Zhao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zizhang and Zhang, Cheng and Li, Zhengqin and Howard-Jenkins, Henry and Lv, Zhaoyang and Geng, Chen and Wu, Jiajun and Newcombe, Richard and Engel, Jakob and Dong, Zhao}, title = {ART: Articulated Reconstruction Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7468-7479} }
SAIDO: Generalizable Detection of AI-Generated Images via Scene-Aware and Importance-Guided Dynamic Optimization in Continual Learning: Yongkang Hu,

Yu Cheng,

Yushuo Zhang,

Yuan Xie,

Zhaoxia Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yongkang and Cheng, Yu and Zhang, Yushuo and Xie, Yuan and Yin, Zhaoxia}, title = {SAIDO: Generalizable Detection of AI-Generated Images via Scene-Aware and Importance-Guided Dynamic Optimization in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3876-3886} }
Occluded Human Body Capture with Frequency Domain Denoising Prior: Buzhen Huang,

Chongyang Xu,

Wentao Tang,

Yuan Shu,

Jingyi Ju,

Binghui Zuo,

Yangang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Buzhen and Xu, Chongyang and Tang, Wentao and Shu, Yuan and Ju, Jingyi and Zuo, Binghui and Wang, Yangang}, title = {Occluded Human Body Capture with Frequency Domain Denoising Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13930-13939} }
MedCLIPSeg: Probabilistic Vision-Language Adaptation for Data-Efficient and Generalizable Medical Image Segmentation: Taha Koleilat,

Hojat Asgariandehkordi,

Omid Nejatimanzari,

Berardino Barile,

Yiming Xiao,

Hassan Rivaz; [pdf] [supp]
[bibtex]
@InProceedings{Koleilat_2026_CVPR, author = {Koleilat, Taha and Asgariandehkordi, Hojat and Nejatimanzari, Omid and Barile, Berardino and Xiao, Yiming and Rivaz, Hassan}, title = {MedCLIPSeg: Probabilistic Vision-Language Adaptation for Data-Efficient and Generalizable Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1406-1417} }
Decoupled and Reusable Adaptation for Efficient Cross-Modal Transfer: Yajing Liu,

Yumeng Zhang,

Yue Si,

Baojie Fan,

Jiandong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yajing and Zhang, Yumeng and Si, Yue and Fan, Baojie and Tian, Jiandong}, title = {Decoupled and Reusable Adaptation for Efficient Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {812-822} }
Olbedo: An Albedo and Shading Aerial Dataset for Large-Scale Outdoor Environments: Shuang Song,

Debao Huang,

Deyan Deng,

Haolin Xiong,

Yang Tang,

Yajie Zhao,

Rongjun Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Shuang and Huang, Debao and Deng, Deyan and Xiong, Haolin and Tang, Yang and Zhao, Yajie and Qin, Rongjun}, title = {Olbedo: An Albedo and Shading Aerial Dataset for Large-Scale Outdoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6474-6483} }
UnityVideo: Unified Multi-Modal Multi-Task Learning for Enhancing World-Aware Video Generation: Jiehui Huang,

Yuechen Zhang,

Xu He,

Yuan Gao,

Zhi Cen,

Bin Xia,

Yan Zhou,

Xin Tao,

Pengfei Wan,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiehui and Zhang, Yuechen and He, Xu and Gao, Yuan and Cen, Zhi and Xia, Bin and Zhou, Yan and Tao, Xin and Wan, Pengfei and Jia, Jiaya}, title = {UnityVideo: Unified Multi-Modal Multi-Task Learning for Enhancing World-Aware Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4471-4481} }
Drift-Resilient Temporal Priors for Visual Tracking: Yuqing Huang,

Liting Lin,

Weijun Zhuang,

Zhenyu He,

Xin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yuqing and Lin, Liting and Zhuang, Weijun and He, Zhenyu and Li, Xin}, title = {Drift-Resilient Temporal Priors for Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6847-6856} }
MuKV: Multi-Grained KV Cache Compression for Long Streaming Video Question-Answering: Junbin Xiao,

Jiajun Chen,

Tianxiang Sun,

Xun Yang,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junbin and Chen, Jiajun and Sun, Tianxiang and Yang, Xun and Yao, Angela}, title = {MuKV: Multi-Grained KV Cache Compression for Long Streaming Video Question-Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11381-11391} }
Spatia: Video Generation with Updatable Spatial Memory: Jinjing Zhao,

Fangyun Wei,

Zhening Liu,

Hongyang Zhang,

Chang Xu,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jinjing and Wei, Fangyun and Liu, Zhening and Zhang, Hongyang and Xu, Chang and Lu, Yan}, title = {Spatia: Video Generation with Updatable Spatial Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4245-4257} }
ORV: 4D Occupancy-centric Robot Video Generation: Xiuyu Yang,

Bohan Li,

Shaocong Xu,

Nan Wang,

Chongjie Ye,

Zhaoxi Chen,

Minghan Qin,

Yikang Ding,

Zheng Zhu,

Xin Jin,

Hang Zhao,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiuyu and Li, Bohan and Xu, Shaocong and Wang, Nan and Ye, Chongjie and Chen, Zhaoxi and Qin, Minghan and Ding, Yikang and Zhu, Zheng and Jin, Xin and Zhao, Hang and Zhao, Hao}, title = {ORV: 4D Occupancy-centric Robot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1053-1066} }
RiskProp: Collision-Anchored Self-Supervised Risk Propagation For Early Accident Anticipation: Yiyang Zou,

Tianhao Zhao,

Peilun Xiao,

Hongyu Jin,

Longyu Qi,

Yuxuan Li,

Liyin Liang,

Yifeng Qian,

Chunbo Lai,

Yutian Lin,

Zhihui Li,

Yu Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Yiyang and Zhao, Tianhao and Xiao, Peilun and Jin, Hongyu and Qi, Longyu and Li, Yuxuan and Liang, Liyin and Qian, Yifeng and Lai, Chunbo and Lin, Yutian and Li, Zhihui and Wu, Yu}, title = {RiskProp: Collision-Anchored Self-Supervised Risk Propagation For Early Accident Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2768-2777} }
OVSegDT: Segmenting Transformer for Open-Vocabulary Object Goal Navigation: Tatiana Zemskova,

Aleksei Staroverov,

Dmitry Yudin,

Aleksandr Panov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zemskova_2026_CVPR, author = {Zemskova, Tatiana and Staroverov, Aleksei and Yudin, Dmitry and Panov, Aleksandr}, title = {OVSegDT: Segmenting Transformer for Open-Vocabulary Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8120-8129} }
Learning to Reason in 4D: Dynamic Spatial Understanding for Vision Language Models: Shengchao Zhou,

Yuxin Chen,

Yuying Ge,

Wei Huang,

Jiehong Lin,

Ying Shan,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shengchao and Chen, Yuxin and Ge, Yuying and Huang, Wei and Lin, Jiehong and Shan, Ying and Qi, Xiaojuan}, title = {Learning to Reason in 4D: Dynamic Spatial Understanding for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9637-9646} }
THE MORE, THE MERRIER: CONTRASTIVE FUSION FOR HIGHER-ORDER MULTIMODAL ALIGNMENT: Stefanos Koutoupis,

Michaela Areti Zervou,

Konstantinos Kontras,

Maarten De Vos,

Panagiotis Tsakalides,

Grigorios Tsagkatakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koutoupis_2026_CVPR, author = {Koutoupis, Stefanos and Zervou, Michaela Areti and Kontras, Konstantinos and De Vos, Maarten and Tsakalides, Panagiotis and Tsagkatakis, Grigorios}, title = {THE MORE, THE MERRIER: CONTRASTIVE FUSION FOR HIGHER-ORDER MULTIMODAL ALIGNMENT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8825-8835} }
Beyond Global Similarity: Multi-Conditional Retrieval for Fine-Grained Cross-Modal Understanding: Xuan Lu,

Kangle Li,

Haohang Huang,

Rui Meng,

Wenjun Zeng,

Xiaoyu Shen; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Xuan and Li, Kangle and Huang, Haohang and Meng, Rui and Zeng, Wenjun and Shen, Xiaoyu}, title = {Beyond Global Similarity: Multi-Conditional Retrieval for Fine-Grained Cross-Modal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9699-9709} }
SCE-SLAM: Scale-Consistent Monocular SLAM via Scene Coordinate Embeddings: Yuchen Wu,

Jiahe Li,

Xiaohan Yu,

Lina Yu,

Jin Zheng,

Xiao Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yuchen and Li, Jiahe and Yu, Xiaohan and Yu, Lina and Zheng, Jin and Bai, Xiao}, title = {SCE-SLAM: Scale-Consistent Monocular SLAM via Scene Coordinate Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7480-7490} }
Test-Time Perturbation Tuning with Delayed Feedback for Vision-Language-Action Models: Zehua Zang,

Xi Wang,

Fuchun Sun,

Xiao Xu,

Lixiang Liu,

Jiahuan Zhou,

Jiangmeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Zang_2026_CVPR, author = {Zang, Zehua and Wang, Xi and Sun, Fuchun and Xu, Xiao and Liu, Lixiang and Zhou, Jiahuan and Li, Jiangmeng}, title = {Test-Time Perturbation Tuning with Delayed Feedback for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8110-8119} }
Towards High-resolution and Disentangled Reference-based Sketch Colorization: Dingkun Yan,

Xinrui Wang,

Ru Wang,

Zhuoru Li,

Jinze Yu,

Yusuke Iwasawa,

Yutaka Matsuo,

Jiaxian Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Dingkun and Wang, Xinrui and Wang, Ru and Li, Zhuoru and Yu, Jinze and Iwasawa, Yusuke and Matsuo, Yutaka and Guo, Jiaxian}, title = {Towards High-resolution and Disentangled Reference-based Sketch Colorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11472-11481} }
UniDex: A Robot Foundation Suite for Universal Dexterous Hand Control from Egocentric Human Videos: Gu Zhang,

Qicheng Xu,

Haozhe Zhang,

Jianhan Ma,

Long He,

Yiming Bao,

Zeyu Ping,

Zhecheng Yuan,

Chenhao Lu,

Chengbo Yuan,

Tianhai Liang,

Xiaoyu Tian,

Maanping Shao,

Feihong Zhang,

Mingyu Ding,

Yang Gao,

Hao Zhao,

Hang Zhao,

Huazhe Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gu and Xu, Qicheng and Zhang, Haozhe and Ma, Jianhan and He, Long and Bao, Yiming and Ping, Zeyu and Yuan, Zhecheng and Lu, Chenhao and Yuan, Chengbo and Liang, Tianhai and Tian, Xiaoyu and Shao, Maanping and Zhang, Feihong and Ding, Mingyu and Gao, Yang and Zhao, Hao and Zhao, Hang and Xu, Huazhe}, title = {UniDex: A Robot Foundation Suite for Universal Dexterous Hand Control from Egocentric Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1841-1852} }
LaMoGen: Language to Motion Generation Through LLM-Guided Symbolic Inference: Junkun Jiang,

Ho Yin Au,

Jingyu Xiang,

Jie Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Junkun and Au, Ho Yin and Xiang, Jingyu and Chen, Jie}, title = {LaMoGen: Language to Motion Generation Through LLM-Guided Symbolic Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9364-9373} }
PRUE: A Practical Recipe for Field Boundary Segmentation at Scale: Gedeon Muhawenayo,

Caleb Robinson,

Subash Khanal,

Zhanpei Fang,

Isaac Corley,

Alexander Wollam,

Tianyi Gao,

Leonard Strnad,

Ryan Avery,

Lyndon Estes,

Ana Tárano,

Nathan Jacobs,

Hannah Kerner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Muhawenayo_2026_CVPR, author = {Muhawenayo, Gedeon and Robinson, Caleb and Khanal, Subash and Fang, Zhanpei and Corley, Isaac and Wollam, Alexander and Gao, Tianyi and Strnad, Leonard and Avery, Ryan and Estes, Lyndon and T\'arano, Ana and Jacobs, Nathan and Kerner, Hannah}, title = {PRUE: A Practical Recipe for Field Boundary Segmentation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6484-6495} }
Recovering Physically Plausible Human-Object Interactions from Monocular Videos: Dingbang Huang,

Etienne Vouga,

Qixing Huang,

Georgios Pavlakos; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Dingbang and Vouga, Etienne and Huang, Qixing and Pavlakos, Georgios}, title = {Recovering Physically Plausible Human-Object Interactions from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7079-7088} }
SPARK: Sim-ready Part-level Articulated Reconstruction with VLM Knowledge: Yumeng He,

Ying Jiang,

Jiayin Lu,

Yin Yang,

Chenfanfu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yumeng and Jiang, Ying and Lu, Jiayin and Yang, Yin and Jiang, Chenfanfu}, title = {SPARK: Sim-ready Part-level Articulated Reconstruction with VLM Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7233-7243} }
LaSM: Layer-wise Scaling Mechanism for Defending Pop-up Attack on GUI Agents: Zihe Yan,

Zhuosheng Zhang,

Jiaping Gui,

Gongshen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Zihe and Zhang, Zhuosheng and Gui, Jiaping and Liu, Gongshen}, title = {LaSM: Layer-wise Scaling Mechanism for Defending Pop-up Attack on GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6528-6537} }
AGiLe: Learning Robust Long-Horizon Manipulation via Affordance-Grounded Bidirectional Latent Planning: Zixuan Chen,

Xiangrong Feng,

Jieqi Shi,

Lin Shao,

Jing Huo,

Yang Gao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zixuan and Feng, Xiangrong and Shi, Jieqi and Shao, Lin and Huo, Jing and Gao, Yang}, title = {AGiLe: Learning Robust Long-Horizon Manipulation via Affordance-Grounded Bidirectional Latent Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6760-6769} }
MaxMark: High-Capacity Diffusion-Native Watermarking via Robust and Invertible Latent Embedding: Xuanhang Chang,

Zhonghao Yang,

Cheng Zhuo,

Yu Li; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Xuanhang and Yang, Zhonghao and Zhuo, Cheng and Li, Yu}, title = {MaxMark: High-Capacity Diffusion-Native Watermarking via Robust and Invertible Latent Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9394-9403} }
DrivePTS: A Progressive Learning Framework with Textual and Structural Enhancement for Driving Scene Generation: Zhechao Wang,

Yiming Zeng,

Lufan Ma,

Zeqing Fu,

Chen Bai,

Dongshuo Yin,

Ziyao Lin,

Cheng Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhechao and Zeng, Yiming and Ma, Lufan and Fu, Zeqing and Bai, Chen and Yin, Dongshuo and Lin, Ziyao and Lu, Cheng}, title = {DrivePTS: A Progressive Learning Framework with Textual and Structural Enhancement for Driving Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3699-3708} }
Local Motion Matters: A Deconstruct-Recompose Paradigm for Reinforcement Learning Pre-training from Videos: Jinwen Wang,

Youfang Lin,

Xiaobo Hu,

Shuo Wang,

Kai Lv; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jinwen and Lin, Youfang and Hu, Xiaobo and Wang, Shuo and Lv, Kai}, title = {Local Motion Matters: A Deconstruct-Recompose Paradigm for Reinforcement Learning Pre-training from Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9859-9868} }
GeoTikzBridge: Advancing Multimodal Code Generation for Geometric Perception and Reasoning: Jiayin Sun,

Caixia Sun,

Boyu Yang,

Hailin Li,

Xiao Chen,

Yi Zhang,

Errui Ding,

Liang Li,

Chao Deng,

Junlan Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jiayin and Sun, Caixia and Yang, Boyu and Li, Hailin and Chen, Xiao and Zhang, Yi and Ding, Errui and Li, Liang and Deng, Chao and Feng, Junlan}, title = {GeoTikzBridge: Advancing Multimodal Code Generation for Geometric Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9593-9603} }
Omni2Sound: Towards Unified Video-Text-to-Audio Generation: Yusheng Dai,

Zehua Chen,

Yuxuan Jiang,

Qiuhong Ke,

Jianfei Cai,

Jun Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Yusheng and Chen, Zehua and Jiang, Yuxuan and Ke, Qiuhong and Cai, Jianfei and Zhu, Jun}, title = {Omni2Sound: Towards Unified Video-Text-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1661-1671} }
LogCD: Local-to-global Consistency Distillation for Few-step Image Generation: Qingsong Xie,

Zhenyi Liao,

Chen Chen,

Zhijie Deng,

Haonan Lu; [pdf]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Qingsong and Liao, Zhenyi and Chen, Chen and Deng, Zhijie and Lu, Haonan}, title = {LogCD: Local-to-global Consistency Distillation for Few-step Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8954-8964} }
EW-DETR: Evolving World Object Detection via Incremental Low-Rank DEtection TRansformer: Munish Monga,

Vishal Chudasama,

Pankaj Wasnik,

C.V. Jawahar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Monga_2026_CVPR, author = {Monga, Munish and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {EW-DETR: Evolving World Object Detection via Incremental Low-Rank DEtection TRansformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11324-11333} }
QuietPrune: Query-Guided Early Token Pruning for Vision-Language Models: Tianxiao Gao,

Shanwei Zhao,

Shuo Fang,

Shiai Zhu,

Chenguang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Tianxiao and Zhao, Shanwei and Fang, Shuo and Zhu, Shiai and Ma, Chenguang}, title = {QuietPrune: Query-Guided Early Token Pruning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3553-3562} }
b-CLIP: Text-Conditioned Contrastive Learning for Multi-Granular Vision-Language Alignment: Fatimah Zohra,

Chen Zhao,

Hani Itani,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Zohra_2026_CVPR, author = {Zohra, Fatimah and Zhao, Chen and Itani, Hani and Ghanem, Bernard}, title = {b-CLIP: Text-Conditioned Contrastive Learning for Multi-Granular Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {680-689} }
Cross-modal Identity Mapping: Minimizing Information Loss in Modality Conversion via Reinforcement Learning: Haonan Jia,

Shichao Dong,

Xin Dong,

Zenghui Sun,

Jin Wang,

Jinsong Lan,

Xiaoyong Zhu,

Bo Zheng,

Kaifu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Haonan and Dong, Shichao and Dong, Xin and Sun, Zenghui and Wang, Jin and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo and Zhang, Kaifu}, title = {Cross-modal Identity Mapping: Minimizing Information Loss in Modality Conversion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {766-777} }
Towards Highly Transferable Vision-Language Attack via Semantic-Augmented Dynamic Contrastive Interaction: Yuanbo Li,

Tianyang Xu,

Cong Hu,

Tao Zhou,

Xiaojun Wu,

Josef Kittler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuanbo and Xu, Tianyang and Hu, Cong and Zhou, Tao and Wu, Xiaojun and Kittler, Josef}, title = {Towards Highly Transferable Vision-Language Attack via Semantic-Augmented Dynamic Contrastive Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1493-1502} }
CC-VQA: Conflict- and Correlation-Aware Method for Mitigating Knowledge Conflict in Knowledge-Based Visual Question Answering: Yuyang Hong,

Jiaqi Gu,

Yujing Lou,

Lubin Fan,

Qi Yang,

Ying Wang,

Kun Ding,

Yue Wu,

Shiming Xiang,

Jieping Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Yuyang and Gu, Jiaqi and Lou, Yujing and Fan, Lubin and Yang, Qi and Wang, Ying and Ding, Kun and Wu, Yue and Xiang, Shiming and Ye, Jieping}, title = {CC-VQA: Conflict- and Correlation-Aware Method for Mitigating Knowledge Conflict in Knowledge-Based Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5232-5241} }
OneOcc: Semantic Occupancy Prediction for Legged Robots with a Single Panoramic Camera: Hao Shi,

Ze Wang,

Shangwei Guo,

Mengfei Duan,

Song Wang,

Teng Chen,

Kailun Yang,

Lin Wang,

Kaiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Hao and Wang, Ze and Guo, Shangwei and Duan, Mengfei and Wang, Song and Chen, Teng and Yang, Kailun and Wang, Lin and Wang, Kaiwei}, title = {OneOcc: Semantic Occupancy Prediction for Legged Robots with a Single Panoramic Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14229-14240} }
ModularAgent: A Task-Aware Modular Framework for Joint Optimization of Multimodal Large Language Models and World Models: Yu-Wei Zhan,

Xin Wang,

Pengzhe Mao,

Tongtong Feng,

Ren Wang,

Wenwu Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yu-Wei and Wang, Xin and Mao, Pengzhe and Feng, Tongtong and Wang, Ren and Zhu, Wenwu}, title = {ModularAgent: A Task-Aware Modular Framework for Joint Optimization of Multimodal Large Language Models and World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8087-8096} }
Bridging Domains through Subspace-Aware Model Merging: Levy Chaves,

Chao Zhou,

Rebekka Burkholz,

Eduardo Valle,

Sandra Avila; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaves_2026_CVPR, author = {Chaves, Levy and Zhou, Chao and Burkholz, Rebekka and Valle, Eduardo and Avila, Sandra}, title = {Bridging Domains through Subspace-Aware Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7990-7999} }
Dynamic Black-hole Emission Tomography with Physics-informed Neural Fields: Berthy T. Feng,

Andrew A. Chael,

David Bromley,

Aviad Levis,

William T. Freeman,

Katherine L. Bouman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Berthy T. and Chael, Andrew A. and Bromley, David and Levis, Aviad and Freeman, William T. and Bouman, Katherine L.}, title = {Dynamic Black-hole Emission Tomography with Physics-informed Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12511-12521} }
Spatial-Aware VLA Pretraining through Visual-Physical Alignment from Human Videos: Yicheng Feng,

Wanpeng Zhang,

Ye Wang,

Hao Luo,

Haoqi Yuan,

Sipeng Zheng,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yicheng and Zhang, Wanpeng and Wang, Ye and Luo, Hao and Yuan, Haoqi and Zheng, Sipeng and Lu, Zongqing}, title = {Spatial-Aware VLA Pretraining through Visual-Physical Alignment from Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {712-723} }
Back to the Feature: Explaining Video Classifiers with Video Counterfactual Explanations: Chao Wang,

Chengan Che,

Xinyue Chen,

Sophia Tsoka,

Luis C. Garcia-Peraza-Herrera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chao and Che, Chengan and Chen, Xinyue and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {Back to the Feature: Explaining Video Classifiers with Video Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9921-9931} }
Planning in 8 Tokens: A Compact Discrete Tokenizer for Latent World Model: Dongwon Kim,

Gawon Seo,

Jinsung Lee,

Minsu Cho,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Dongwon and Seo, Gawon and Lee, Jinsung and Cho, Minsu and Kwak, Suha}, title = {Planning in 8 Tokens: A Compact Discrete Tokenizer for Latent World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8183-8193} }
Vision-Speech Models: Teaching Speech Models to Converse about Images: Amélie Royer,

Moritz Böhle,

Laurent Mazaré,

Neil Zeghidour,

Alexandre Défossez,

Patrick Pérez; [pdf] [supp]
[bibtex]
@InProceedings{Royer_2026_CVPR, author = {Royer, Am\'elie and B\"ohle, Moritz and Mazar\'e, Laurent and Zeghidour, Neil and D\'efossez, Alexandre and P\'erez, Patrick}, title = {Vision-Speech Models: Teaching Speech Models to Converse about Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1706-1715} }
Evidential Transformation Network: Turning Pretrained Models into Evidential Models for Post-hoc Uncertainty Estimation: Yongchan Chun,

Chanhee Park,

Jeongho Yoon,

Jaehyung Seo,

Heuiseok Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chun_2026_CVPR, author = {Chun, Yongchan and Park, Chanhee and Yoon, Jeongho and Seo, Jaehyung and Lim, Heuiseok}, title = {Evidential Transformation Network: Turning Pretrained Models into Evidential Models for Post-hoc Uncertainty Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6157-6166} }
SplitFlux: Learning to Decouple Content and Style from a Single Image: Yitong Yang,

Yinglin Wang,

Changshuo Wang,

Yongjun Zhang,

Ziyang Chen,

Shuting He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yitong and Wang, Yinglin and Wang, Changshuo and Zhang, Yongjun and Chen, Ziyang and He, Shuting}, title = {SplitFlux: Learning to Decouple Content and Style from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {572-582} }
CFG-Ctrl: Control-Based Classifier-Free Diffusion Guidance: Hanyang Wang,

Yiyang Liu,

Jiawei Chi,

Fangfu Liu,

Ran Xue,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hanyang and Liu, Yiyang and Chi, Jiawei and Liu, Fangfu and Xue, Ran and Duan, Yueqi}, title = {CFG-Ctrl: Control-Based Classifier-Free Diffusion Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11437-11447} }
FlowComposer: Composable Flows for Compositional Zero-Shot Learning: Zhenqi He,

Lin Li,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zhenqi and Li, Lin and Chen, Long}, title = {FlowComposer: Composable Flows for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12396-12405} }
XPaintNet: An eXtreme Lightweight Framework for Stereoscopic Conversion without Inpainting Network: Kihwan Yoon,

Juyeon Shin,

Jungheum Kang,

Sijung Kim,

Minyong Jeon; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Kihwan and Shin, Juyeon and Kang, Jungheum and Kim, Sijung and Jeon, Minyong}, title = {XPaintNet: An eXtreme Lightweight Framework for Stereoscopic Conversion without Inpainting Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5762-5771} }
Why Does RL Generalize Better Than SFT? A Data-Centric Perspective on VLM Post-Training: Aojun Lu,

Tao Feng,

Hangjie Yuan,

Wei Li,

Yanan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Aojun and Feng, Tao and Yuan, Hangjie and Li, Wei and Sun, Yanan}, title = {Why Does RL Generalize Better Than SFT? A Data-Centric Perspective on VLM Post-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4761-4771} }
Cross-Hand Latent Representation for Vision-Language-Action Models: Guangqi Jiang,

Yutong Liang,

Jianglong Ye,

Jia-Yang Huang,

Changwei Jing,

Rocky Duan,

Pieter Abbeel,

Xiaolong Wang,

Xueyan Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Guangqi and Liang, Yutong and Ye, Jianglong and Huang, Jia-Yang and Jing, Changwei and Duan, Rocky and Abbeel, Pieter and Wang, Xiaolong and Zou, Xueyan}, title = {Cross-Hand Latent Representation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13496-13507} }
NoOVD: Novel Category Discovery and Embedding for Open-Vocabulary Object Detection: Yupeng Zhang,

Ruize Han,

Zhiwei Chen,

Wei Feng,

Liang Wan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Chen, Zhiwei and Feng, Wei and Wan, Liang}, title = {NoOVD: Novel Category Discovery and Embedding for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6304-6313} }
POGA: Paraphrased and Oppositional Graph Alignment for Fine-Grained Cross-Modal Retrieval: Junfeng Zhang,

Zhe Xue,

Yuankai Qi,

Junping Du,

Xiangyang Kong,

Yishuo Yan,

Amin Beheshti,

Jian Yang,

Anton van den Hengel,

Ming-Hsuan Yang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junfeng and Xue, Zhe and Qi, Yuankai and Du, Junping and Kong, Xiangyang and Yan, Yishuo and Beheshti, Amin and Yang, Jian and van den Hengel, Anton and Yang, Ming-Hsuan}, title = {POGA: Paraphrased and Oppositional Graph Alignment for Fine-Grained Cross-Modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2735-2745} }
PatchAlign3D: Local Feature Alignment for Dense 3D Shape Understanding: Souhail Hadgi,

Bingchen Gong,

Ramana Sundararaman,

Emery Pierson,

Lei Li,

Peter Wonka,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hadgi_2026_CVPR, author = {Hadgi, Souhail and Gong, Bingchen and Sundararaman, Ramana and Pierson, Emery and Li, Lei and Wonka, Peter and Ovsjanikov, Maks}, title = {PatchAlign3D: Local Feature Alignment for Dense 3D Shape Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3014-3023} }
TACO: Task-Aware Contrastive Learning for Joint LiDAR Localization and 3D Object Detection: Leyuan Xing,

Huanjia Zhang,

Dongyu Pan,

Hai Wu,

Qiming Xia,

Kezheng Xiong,

Wen Li,

Chenglu Wen,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Leyuan and Zhang, Huanjia and Pan, Dongyu and Wu, Hai and Xia, Qiming and Xiong, Kezheng and Li, Wen and Wen, Chenglu and Wang, Cheng}, title = {TACO: Task-Aware Contrastive Learning for Joint LiDAR Localization and 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9965-9975} }
SAM2Text: Towards Prompt-Free and Multi-Resolution Video Scene Text Segmentation: Jing-Yao Zhang,

Heng Zhang,

Mingsen Zhang,

Binbin Yang,

Fei Yin; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jing-Yao and Zhang, Heng and Zhang, Mingsen and Yang, Binbin and Yin, Fei}, title = {SAM2Text: Towards Prompt-Free and Multi-Resolution Video Scene Text Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3825-3834} }
UniAVGen: Unified Audio and Video Generation with Asymmetric Cross-Modal Interactions: Guozhen Zhang,

Zixiang Zhou,

Teng Hu,

Ziqiao Peng,

Youliang Zhang,

Yi Chen,

Yuan Zhou,

Qinglin Lu,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guozhen and Zhou, Zixiang and Hu, Teng and Peng, Ziqiao and Zhang, Youliang and Chen, Yi and Zhou, Yuan and Lu, Qinglin and Wang, Limin}, title = {UniAVGen: Unified Audio and Video Generation with Asymmetric Cross-Modal Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1950-1960} }
Egocentric Visibility-Aware Human Pose Estimation: Peng Dai,

Yu Zhang,

Feng Yiqiang,

Zhen Fan,

Yang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Peng and Zhang, Yu and Yiqiang, Feng and Fan, Zhen and Zhang, Yang}, title = {Egocentric Visibility-Aware Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7047-7057} }
VesMamba: 3D Pulmonary Vessel Segmentation from CT images via Mamba with Structural Perception and Scale-aware Filtering: Zhipeng Liu,

Guilian Chen,

Zheng Jiang,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhipeng and Chen, Guilian and Jiang, Zheng and Wu, Huisi and Qin, Jing}, title = {VesMamba: 3D Pulmonary Vessel Segmentation from CT images via Mamba with Structural Perception and Scale-aware Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1439-1449} }
Visual Grounding for Object Questions: Martin Nicolas Everaert,

Xiruo Liu,

Hiroyuki Takeda,

Raja Bala,

Vivek Yadav,

Vidya Narayanan; [pdf] [supp]
[bibtex]
@InProceedings{Everaert_2026_CVPR, author = {Everaert, Martin Nicolas and Liu, Xiruo and Takeda, Hiroyuki and Bala, Raja and Yadav, Vivek and Narayanan, Vidya}, title = {Visual Grounding for Object Questions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11966-11975} }
HySeg: Learning Generative Priors for Structure-Aware Remote Sensing Segmentation: Jie Qiu,

Xin Li,

Fan Yang,

Yan Wang,

Dong Yu,

Changying Wang,

Linwei Dai,

Yongxiang Chen,

Youqin Chen,

Jianzhang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Jie and Li, Xin and Yang, Fan and Wang, Yan and Yu, Dong and Wang, Changying and Dai, Linwei and Chen, Yongxiang and Chen, Youqin and Chen, Jianzhang}, title = {HySeg: Learning Generative Priors for Structure-Aware Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6410-6420} }
Authorize-on-Demand: Dynamic Authorization with Legality-Aware Intellectual Property Protection for VLMs: Lianyu Wang,

Meng Wang,

Huazhu Fu,

Daoqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lianyu and Wang, Meng and Fu, Huazhu and Zhang, Daoqiang}, title = {Authorize-on-Demand: Dynamic Authorization with Legality-Aware Intellectual Property Protection for VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6621-6630} }
Agent4FaceForgery: Multi-Agent LLM Framework for Realistic Face Forgery Detection: Yingxin Lai,

Zitong YU,

Jun Wang,

Linlin Shen,

Yong Xu,

Xiaochun Cao; [pdf] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Yingxin and YU, Zitong and Wang, Jun and Shen, Linlin and Xu, Yong and Cao, Xiaochun}, title = {Agent4FaceForgery: Multi-Agent LLM Framework for Realistic Face Forgery Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14073-14083} }
D2FANet: Enhancing Video Object Detection with Dual-Domain Feature Aggregation Network: Qiang Qi,

Wenqi Shang,

Meifang Wang,

Xiao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Qiang and Shang, Wenqi and Wang, Meifang and Wang, Xiao}, title = {D2FANet: Enhancing Video Object Detection with Dual-Domain Feature Aggregation Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11229-11239} }
A Geometric Algebra-Informed 3DGS Framework for Wireless Channel Prediction: Jingzhou Shen,

Tianya Zhao,

Xuyu Wang; [pdf]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Jingzhou and Zhao, Tianya and Wang, Xuyu}, title = {A Geometric Algebra-Informed 3DGS Framework for Wireless Channel Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4973-4982} }
LoPrune: Efficient Data Pruning for LoRA-Based Fine-Tuning of Vision Transformer: Qiang He,

Yaozong Yang,

Kaibin Wang,

Ziteng Wei,

Feifei Chen,

Caslon Chua,

Yun Yang; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qiang and Yang, Yaozong and Wang, Kaibin and Wei, Ziteng and Chen, Feifei and Chua, Caslon and Yang, Yun}, title = {LoPrune: Efficient Data Pruning for LoRA-Based Fine-Tuning of Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5242-5252} }
FlashLips: 100-FPS Mask-Free Latent Lip-Sync using Reconstruction Instead of Diffusion or GANs: Andreas Zinonos,

Michał Stypułkowski,

Antoni Bigata,

Stavros Petridis,

Maja Pantic,

Nikita Drobyshev; [pdf] [supp]
[bibtex]
@InProceedings{Zinonos_2026_CVPR, author = {Zinonos, Andreas and Stypu{\l}kowski, Micha{\l} and Bigata, Antoni and Petridis, Stavros and Pantic, Maja and Drobyshev, Nikita}, title = {FlashLips: 100-FPS Mask-Free Latent Lip-Sync using Reconstruction Instead of Diffusion or GANs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10898-10908} }
Bridging Domain Expertise and Generalization for Performance Estimation: Shuxuan Li,

Zhilin Zhao,

Quyu Kong,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shuxuan and Zhao, Zhilin and Kong, Quyu and Zheng, Wei-Shi}, title = {Bridging Domain Expertise and Generalization for Performance Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7967-7977} }
VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping: Haotian Dong,

Ye Li,

Rongwei Lu,

Chen Tang,

Shu-Tao Xia,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Haotian and Li, Ye and Lu, Rongwei and Tang, Chen and Xia, Shu-Tao and Wang, Zhi}, title = {VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12173-12182} }
Beyond Top Activations: Efficient and Reliable Crowdsourced Evaluation of Automated Interpretability: Tuomas Oikarinen,

Ge Yan,

Akshay Kulkarni,

Tsui-Wei Weng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oikarinen_2026_CVPR, author = {Oikarinen, Tuomas and Yan, Ge and Kulkarni, Akshay and Weng, Tsui-Wei}, title = {Beyond Top Activations: Efficient and Reliable Crowdsourced Evaluation of Automated Interpretability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2885-2894} }
NOVA: Sparse Control, Dense Synthesis for Pair-Free Video Editing: Tianlin Pan,

Jiayi Dai,

Chenpu Yuan,

Zhengyao Lv,

Binxin Yang,

Hubery Yin,

Chen Li,

Jing Lyu,

Caifeng Shan,

Chenyang Si; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Tianlin and Dai, Jiayi and Yuan, Chenpu and Lv, Zhengyao and Yang, Binxin and Yin, Hubery and Li, Chen and Lyu, Jing and Shan, Caifeng and Si, Chenyang}, title = {NOVA: Sparse Control, Dense Synthesis for Pair-Free Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1917-1927} }
ChimeraLoRA: Multi-Head LoRA-Guided Synthetic Datasets: Hoyoung Kim,

Minwoo Jang,

Jabin Koo,

Sangdoo Yun,

Jungseul Ok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hoyoung and Jang, Minwoo and Koo, Jabin and Yun, Sangdoo and Ok, Jungseul}, title = {ChimeraLoRA: Multi-Head LoRA-Guided Synthetic Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9063-9073} }
CREval: An Automated Interpretable Evaluation for Creative Image Manipulation under Complex Instructions: Chonghuinan Wang,

Zihan Chen,

Yuxiang Wei,

Tianyi Jiang,

Xiaohe Wu,

Fan Li,

Wangmeng Zuo,

Hongxun Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chonghuinan and Chen, Zihan and Wei, Yuxiang and Jiang, Tianyi and Wu, Xiaohe and Li, Fan and Zuo, Wangmeng and Yao, Hongxun}, title = {CREval: An Automated Interpretable Evaluation for Creative Image Manipulation under Complex Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9029-9039} }
MedGRPO: Multi-Task Reinforcement Learning for Heterogeneous Medical Video Understanding: Yuhao Su,

Anwesa Choudhuri,

Zhongpai Gao,

Benjamin Planche,

Van Nguyen Nguyen,

Meng Zheng,

Yuhan Shen,

Arun Innanje,

Terrence Chen,

Ehsan Elhamifar,

Ziyan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Yuhao and Choudhuri, Anwesa and Gao, Zhongpai and Planche, Benjamin and Nguyen, Van Nguyen and Zheng, Meng and Shen, Yuhan and Innanje, Arun and Chen, Terrence and Elhamifar, Ehsan and Wu, Ziyan}, title = {MedGRPO: Multi-Task Reinforcement Learning for Heterogeneous Medical Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2788-2798} }
Modeling Spatiotemporal Neural Frames for High Resolution Brain Dynamic: Wanying Qu,

Jianxiong Gao,

Wei Wang,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Wanying and Gao, Jianxiong and Wang, Wei and Fu, Yanwei}, title = {Modeling Spatiotemporal Neural Frames for High Resolution Brain Dynamic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6994-7002} }
Score2Instruct: Scaling Up Video Quality-Centric Instructions via Automated Dimension Scoring: Qizhi Xie,

Kun Yuan,

Yunpeng Qu,

Jiachao Gong,

Mingda Wu,

Ming Sun,

Chao Zhou,

Jihong Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Qizhi and Yuan, Kun and Qu, Yunpeng and Gong, Jiachao and Wu, Mingda and Sun, Ming and Zhou, Chao and Zhu, Jihong}, title = {Score2Instruct: Scaling Up Video Quality-Centric Instructions via Automated Dimension Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11426-11436} }
Pano360: Perspective to Panoramic Vision with Geometric Consistency: Zhengdong Zhu,

Weiyi Xue,

Zuyuan Yang,

Wenlve Zhou,

Zhiheng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengdong and Xue, Weiyi and Yang, Zuyuan and Zhou, Wenlve and Zhou, Zhiheng}, title = {Pano360: Perspective to Panoramic Vision with Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7600-7609} }
Towards an Incremental Unified Multimodal Anomaly Detection: Augmenting Multimodal Denoising From an Information Bottleneck Perspective: Kaifang Long,

Lianbo Ma,

Jiaqi Liu,

Liming Liu,

Guoyang Xie; [pdf] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Kaifang and Ma, Lianbo and Liu, Jiaqi and Liu, Liming and Xie, Guoyang}, title = {Towards an Incremental Unified Multimodal Anomaly Detection: Augmenting Multimodal Denoising From an Information Bottleneck Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14116-14125} }
NeuroFlow: Toward Unified Visual Encoding and Decoding from Neural Activity: Weijian Mai,

Mu Nan,

Yu Zhu,

Jiahang Cao,

Rui Zhang,

Yuqin Dai,

Chunfeng Song,

Andrew Luo,

Jiamin Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Weijian and Nan, Mu and Zhu, Yu and Cao, Jiahang and Zhang, Rui and Dai, Yuqin and Song, Chunfeng and Luo, Andrew and Wu, Jiamin}, title = {NeuroFlow: Toward Unified Visual Encoding and Decoding from Neural Activity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12966-12976} }
PhaseWin Search Framework Enable Efficient Object-Level Interpretation: Zihan Gu,

Ruoyu Chen,

Junchi Zhang,

Yue Hu,

Hua Zhang,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zihan and Chen, Ruoyu and Zhang, Junchi and Hu, Yue and Zhang, Hua and Cao, Xiaochun}, title = {PhaseWin Search Framework Enable Efficient Object-Level Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2875-2884} }
FedAFD: Multimodal Federated Learning via Adversarial Fusion and Distillation: Min Tan,

Junchao Ma,

Yinfu Feng,

Jiajun Ding,

Wenwen Pan,

Tingting Han,

Qian Zheng,

Zhenzhong Kuang,

Zhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Min and Ma, Junchao and Feng, Yinfu and Ding, Jiajun and Pan, Wenwen and Han, Tingting and Zheng, Qian and Kuang, Zhenzhong and Yu, Zhou}, title = {FedAFD: Multimodal Federated Learning via Adversarial Fusion and Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3400-3409} }
ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation: Wei Li,

Jizhihui Liu,

Li Yixing,

Junwen Tong,

Rui Shao,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wei and Liu, Jizhihui and Yixing, Li and Tong, Junwen and Shao, Rui and Nie, Liqiang}, title = {ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6706-6717} }
ActiveAD: Planning-Oriented Active Learning for End-to-End Autonomous Driving: Han Lu,

Xiaosong Jia,

Yichen Xie,

Siyu Sun,

Wenlong Liao,

Xiaokang Yang,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Han and Jia, Xiaosong and Xie, Yichen and Sun, Siyu and Liao, Wenlong and Yang, Xiaokang and Yan, Junchi}, title = {ActiveAD: Planning-Oriented Active Learning for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3656-3666} }
Plug-and-Play Incomplete Multi-View Clustering via Janus-Faced Affinity Learning with Topology Harmonization: Shengju Yu,

Suyuan Liu,

Wenhao Shao,

Siwei Wang,

Ke Liang,

Xihong Yang,

Tiejun Li,

Xinwang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Shengju and Liu, Suyuan and Shao, Wenhao and Wang, Siwei and Liang, Ke and Yang, Xihong and Li, Tiejun and Liu, Xinwang}, title = {Plug-and-Play Incomplete Multi-View Clustering via Janus-Faced Affinity Learning with Topology Harmonization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3594-3603} }
SHOW3D: Capturing Scenes of 3D Hands and Objects in the Wild: Patrick Rim,

Kevin Harris,

Braden Copple,

Shangchen Han,

Xu Xie,

Ivan Shugurov,

Sizhe An,

He Wen,

Alex Wong,

Tomas Hodan,

Kun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rim_2026_CVPR, author = {Rim, Patrick and Harris, Kevin and Copple, Braden and Han, Shangchen and Xie, Xu and Shugurov, Ivan and An, Sizhe and Wen, He and Wong, Alex and Hodan, Tomas and He, Kun}, title = {SHOW3D: Capturing Scenes of 3D Hands and Objects in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7111-7120} }
Cross-Instance Gaussian Splatting Registration via Geometry-Aware Feature-Guided Alignment: Roy Amoyal,

Oren Freifeld,

Chaim Baskin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Amoyal_2026_CVPR, author = {Amoyal, Roy and Freifeld, Oren and Baskin, Chaim}, title = {Cross-Instance Gaussian Splatting Registration via Geometry-Aware Feature-Guided Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4993-5002} }
MoReGen: Multi-Agent Motion-Reasoning Engine for Code-based Text-to-Video Synthesis: Xiangyu Bai,

He Liang,

Bishoy Galoaa,

Utsav Nandi,

Shayda Moezzi,

Yuhang He,

Sarah Ostadabbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Xiangyu and Liang, He and Galoaa, Bishoy and Nandi, Utsav and Moezzi, Shayda and He, Yuhang and Ostadabbas, Sarah}, title = {MoReGen: Multi-Agent Motion-Reasoning Engine for Code-based Text-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7632-7642} }
DDiT: Dynamic Patch Scheduling for Efficient Diffusion Transformers: Dahye Kim,

Deepti Ghadiyaram,

Raghudeep Gadde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Dahye and Ghadiyaram, Deepti and Gadde, Raghudeep}, title = {DDiT: Dynamic Patch Scheduling for Efficient Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11459-11471} }
Bridging the Modality Gap in Compositional Zero-Shot Learning via Sparse Alignment and Unimodal Memory Bank: Yang Zhang,

Zhixiang Chi,

Xudong Yan,

Yang Wang,

Songhe Feng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yang and Chi, Zhixiang and Yan, Xudong and Wang, Yang and Feng, Songhe}, title = {Bridging the Modality Gap in Compositional Zero-Shot Learning via Sparse Alignment and Unimodal Memory Bank}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5553-5563} }
SSM-Aware Token-Efficient VMamba via Adaptive Patch Pruning and Merging for Person Re-Identification: Huiyuan Huang,

Sang Min Yoon; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Huiyuan and Yoon, Sang Min}, title = {SSM-Aware Token-Efficient VMamba via Adaptive Patch Pruning and Merging for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4363-4372} }
CARI4D: Category Agnostic 4D Reconstruction of Human-Object Interaction: Xianghui Xie,

Bowen Wen,

Yan Chang,

Hesam Rabeti,

Jiefeng Li,

Ye Yuan,

Gerard Pons-Moll,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Xianghui and Wen, Bowen and Chang, Yan and Rabeti, Hesam and Li, Jiefeng and Yuan, Ye and Pons-Moll, Gerard and Birchfield, Stan}, title = {CARI4D: Category Agnostic 4D Reconstruction of Human-Object Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14006-14016} }
SGI: Structured 2D Gaussians for Efficient and Compact Large Image Representation: Zixuan Pan,

Kaiyuan Tang,

Jun Xia,

Yifan Qin,

Lin Gu,

Chaoli Wang,

Jianxu Chen,

Yiyu Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zixuan and Tang, Kaiyuan and Xia, Jun and Qin, Yifan and Gu, Lin and Wang, Chaoli and Chen, Jianxu and Shi, Yiyu}, title = {SGI: Structured 2D Gaussians for Efficient and Compact Large Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12162-12172} }
Scalable Feature Matching via State Space Modeling and Sparse Correlation: Sin Wai Choo,

Bo Li; [pdf]
[bibtex]
@InProceedings{Choo_2026_CVPR, author = {Choo, Sin Wai and Li, Bo}, title = {Scalable Feature Matching via State Space Modeling and Sparse Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6685-6694} }
Real-World Point Tracking with Verifier-Guided Pseudo-Labeling: Görkay Aydemir,

Fatma Güney,

Weidi Xie; [pdf] [supp]
[bibtex]
@InProceedings{Aydemir_2026_CVPR, author = {Aydemir, G\"orkay and G\"uney, Fatma and Xie, Weidi}, title = {Real-World Point Tracking with Verifier-Guided Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13553-13562} }
ShotDirector: Directorially Controllable Multi-Shot Video Generation with Cinematographic Transitions: Xiaoxue Wu,

Xinyuan Chen,

Yaohui Wang,

Yu Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoxue and Chen, Xinyuan and Wang, Yaohui and Qiao, Yu}, title = {ShotDirector: Directorially Controllable Multi-Shot Video Generation with Cinematographic Transitions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2079-2089} }
Dual-Prototype-Guided Multi-task Learning for Unsupervised Anomaly Detection and Classification: Qianhao Luo,

Jiajia Mi,

Mingtao Yan,

JingSheng Liu,

ShuYang Pang,

Weiling Li; [pdf]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Qianhao and Mi, Jiajia and Yan, Mingtao and Liu, JingSheng and Pang, ShuYang and Li, Weiling}, title = {Dual-Prototype-Guided Multi-task Learning for Unsupervised Anomaly Detection and Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14137-14146} }
Towards Open-Vocabulary Industrial Defect Understanding with a Large-Scale Multimodal Dataset: Tsai-Ching Ni,

Cheng-Chi Chen,

Yuan-Fu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Tsai-Ching and Chen, Cheng-Chi and Yang, Yuan-Fu}, title = {Towards Open-Vocabulary Industrial Defect Understanding with a Large-Scale Multimodal Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13059-13068} }
Style-GRPO: Semantic-Aware Preference Optimization for Image Style Transfer Guided by Reward Modeling: Jianbin Zhao,

Chaoran Feng,

Miao Yu,

Yingtao Li,

Zhenyu Tang,

Wangbo Yu,

Yian Zhao,

Xiaomin Li,

Li Yuan,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianbin and Feng, Chaoran and Yu, Miao and Li, Yingtao and Tang, Zhenyu and Yu, Wangbo and Zhao, Yian and Li, Xiaomin and Yuan, Li and Tian, Yonghong}, title = {Style-GRPO: Semantic-Aware Preference Optimization for Image Style Transfer Guided by Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12818-12828} }
AXG-Reasoner: Error Detection and Explanation in Long Task Videos with Vision-Language Models: Shih-Po Lee,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Shih-Po and Elhamifar, Ehsan}, title = {AXG-Reasoner: Error Detection and Explanation in Long Task Videos with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3421-3431} }
Learning to See Through a Baby's Eyes: Early Visual Diets Enable Robust Visual Intelligence in Humans and Machines: Yusen Cai,

Qing Lin,

Bhargava Satya Nunna,

Mengmi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Yusen and Lin, Qing and Nunna, Bhargava Satya and Zhang, Mengmi}, title = {Learning to See Through a Baby's Eyes: Early Visual Diets Enable Robust Visual Intelligence in Humans and Machines}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13766-13780} }
MV-RoMa: From Pairwise Matching into Multi-View Track Reconstruction: Jongmin Lee,

Seungyeop Kang,

Sungjoo Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jongmin and Kang, Seungyeop and Yoo, Sungjoo}, title = {MV-RoMa: From Pairwise Matching into Multi-View Track Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7446-7456} }
AnyDoc: Enhancing Document Generation via Large-Scale HTML/CSS Data Synthesis and Height-Aware Reinforcement Optimization: Jiawei Lin,

Wanrong Zhu,

Vlad I Morariu,

Christopher Tensmeyer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiawei and Zhu, Wanrong and I Morariu, Vlad and Tensmeyer, Christopher}, title = {AnyDoc: Enhancing Document Generation via Large-Scale HTML/CSS Data Synthesis and Height-Aware Reinforcement Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {626-635} }
Breaking the 3D Dataset Bottleneck: Fast Scalable Generation of Aligned 3D Assets from Scratch for Category 6D Pose Estimation and Robotic Grasping: Duret Guillaume,

Danylo Mazurak,

Florence Zara,

Jan Peters,

Liming Chen; [pdf] [supp]
[bibtex]
@InProceedings{Guillaume_2026_CVPR, author = {Guillaume, Duret and Mazurak, Danylo and Zara, Florence and Peters, Jan and Chen, Liming}, title = {Breaking the 3D Dataset Bottleneck: Fast Scalable Generation of Aligned 3D Assets from Scratch for Category 6D Pose Estimation and Robotic Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1809-1818} }
Cross from Left to Right Brain: Adaptive Text Dreamer for Vision-and-Language Navigation: Pingrui Zhang,

Yifei Su,

Pengyuan Wu,

Dong An,

Li Zhang,

Zhigang Wang,

Dong Wang,

Bin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Pingrui and Su, Yifei and Wu, Pengyuan and An, Dong and Zhang, Li and Wang, Zhigang and Wang, Dong and Zhao, Bin}, title = {Cross from Left to Right Brain: Adaptive Text Dreamer for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1006-1019} }
The Road Less Seen: Segment Exploration for Weakly Supervised Video Anomaly Detection: Anusha Acharya,

Hitesh Sapkota,

Qi Yu,

Xumin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Acharya_2026_CVPR, author = {Acharya, Anusha and Sapkota, Hitesh and Yu, Qi and Liu, Xumin}, title = {The Road Less Seen: Segment Exploration for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14147-14156} }
Easy3E: Feed-Forward 3D Asset Editing via Rectified Voxel Flow: Shimin Hu,

Yuanyi Wei,

Fei Zha,

Yudong Guo,

Juyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Shimin and Wei, Yuanyi and Zha, Fei and Guo, Yudong and Zhang, Juyong}, title = {Easy3E: Feed-Forward 3D Asset Editing via Rectified Voxel Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12730-12740} }
Query2Uncertainty: Robust Uncertainty Quantification and Calibration for 3D Object Detection under Distribution Shift: Till Beemelmanns,

Alexey Nekrasov,

Stefan Vilceanu,

Jonas Steinhaus,

Timo Woopen,

Bastian Leibe,

Lutz Eckstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beemelmanns_2026_CVPR, author = {Beemelmanns, Till and Nekrasov, Alexey and Vilceanu, Stefan and Steinhaus, Jonas and Woopen, Timo and Leibe, Bastian and Eckstein, Lutz}, title = {Query2Uncertainty: Robust Uncertainty Quantification and Calibration for 3D Object Detection under Distribution Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4677-4686} }
PTC-Depth: Pose-Refined Monocular Depth Estimation with Temporal Consistency: Leezy Han,

Seunggyu Kim,

Dongseok Shim,

Hyeonbeom Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Leezy and Kim, Seunggyu and Shim, Dongseok and Lee, Hyeonbeom}, title = {PTC-Depth: Pose-Refined Monocular Depth Estimation with Temporal Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12617-12627} }
DiffusionFF: A Diffusion-based Framework for Joint Face Forgery Detection and Fine-Grained Artifact Localization: Siran Peng,

Haoyuan Zhang,

Li Gao,

Tianshuo Zhang,

Xiangyu Zhu,

Bao Li,

Weisong Zhao,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Siran and Zhang, Haoyuan and Gao, Li and Zhang, Tianshuo and Zhu, Xiangyu and Li, Bao and Zhao, Weisong and Lei, Zhen}, title = {DiffusionFF: A Diffusion-based Framework for Joint Face Forgery Detection and Fine-Grained Artifact Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14095-14105} }
CycleBEV: Regularizing View Transformation Networks via View Cycle Consistency for Bird's-Eye-View Semantic Segmentation: Jeongbin Hong,

Dooseop Choi,

Taeg-Hyun An,

Kyounghwan An,

Kyoung-Wook Min; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Jeongbin and Choi, Dooseop and An, Taeg-Hyun and An, Kyounghwan and Min, Kyoung-Wook}, title = {CycleBEV: Regularizing View Transformation Networks via View Cycle Consistency for Bird's-Eye-View Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10986-10995} }
Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge: Yu Huang,

Zelin Peng,

Changsong Wen,

Xiaokang Yang,

Wei Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yu and Peng, Zelin and Wen, Changsong and Yang, Xiaokang and Shen, Wei}, title = {Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6399-6409} }
Omni-AD: A Large-scale and Versatile Benchmark for Industrial Anomaly Detection: Dahu Shi,

Chengshen He,

Shaochen Zhang,

Bo Qian,

Xiaochen Quan,

Wencong Zhang,

Xing Wei; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Dahu and He, Chengshen and Zhang, Shaochen and Qian, Bo and Quan, Xiaochen and Zhang, Wencong and Wei, Xing}, title = {Omni-AD: A Large-scale and Versatile Benchmark for Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14157-14166} }
GaussianDWM: 3D Gaussian Driving World Model for Unified Scene Understanding and Multi-Modal Generation: Tianchen Deng,

Xuefeng Chen,

Yi Chen,

Qu Chen,

Yuyao Xu,

Lijin Yang,

Le Xu,

Yu Zhang,

Bo Zhang,

Wuxiong Huang,

Hesheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Tianchen and Chen, Xuefeng and Chen, Yi and Chen, Qu and Xu, Yuyao and Yang, Lijin and Xu, Le and Zhang, Yu and Zhang, Bo and Huang, Wuxiong and Wang, Hesheng}, title = {GaussianDWM: 3D Gaussian Driving World Model for Unified Scene Understanding and Multi-Modal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10656-10667} }
Watch and Learn: Learning to Use Computers from Online Videos: Chan Hee Song,

Yiwen Song,

Palash Goyal,

Yu Su,

Oriana Riva,

Hamid Palangi,

Tomas Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Chan Hee and Song, Yiwen and Goyal, Palash and Su, Yu and Riva, Oriana and Palangi, Hamid and Pfister, Tomas}, title = {Watch and Learn: Learning to Use Computers from Online Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5421-5431} }
DeepProtect: Proactive Face-Swapping Defense using Identity Blending and Attribute Distortion: Eungi Lee,

Seung-hyeok Back,

Hyung-Il Kim,

Seok Bong Yoo; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Eungi and Back, Seung-hyeok and Kim, Hyung-Il and Yoo, Seok Bong}, title = {DeepProtect: Proactive Face-Swapping Defense using Identity Blending and Attribute Distortion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6569-6579} }
R3-PCQA: Ray-Reprojection-Reinforcement for No-Reference 3D Point Cloud Quality Assessment: Junhyuk Seo,

Sanghyuk Seo,

Dawoon Kim,

Heeseok Oh; [pdf] [supp]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Junhyuk and Seo, Sanghyuk and Kim, Dawoon and Oh, Heeseok}, title = {R3-PCQA: Ray-Reprojection-Reinforcement for No-Reference 3D Point Cloud Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9996-10005} }
Learning Anchor in Dual Orthogonal Space for Fast Multi-view Clustering: Yalan Qin,

Hanzhou Wu; [pdf]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yalan and Wu, Hanzhou}, title = {Learning Anchor in Dual Orthogonal Space for Fast Multi-view Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1616-1626} }
DriveMoE: Mixture-of-Experts for Vision-Language-Action Model in End-to-End Autonomous Driving: Zhenjie Yang,

Yilin Chai,

Xiaosong Jia,

Qifeng Li,

Yuqian Shao,

Xuekai Zhu,

Haisheng Su,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenjie and Chai, Yilin and Jia, Xiaosong and Li, Qifeng and Shao, Yuqian and Zhu, Xuekai and Su, Haisheng and Yan, Junchi}, title = {DriveMoE: Mixture-of-Experts for Vision-Language-Action Model in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10678-10688} }
Exposing Functional Fusion: A New Class of Strategic Backdoor in Dynamic Prompt Architectures: Zeyao Liu,

Zhendong Zhao,

Xiaojun Chen,

Xin Zhao,

Yuexin Xuan,

Xiaoshuang Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zeyao and Zhao, Zhendong and Chen, Xiaojun and Zhao, Xin and Xuan, Yuexin and Ji, Xiaoshuang}, title = {Exposing Functional Fusion: A New Class of Strategic Backdoor in Dynamic Prompt Architectures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13376-13385} }
Are We Ready for RL in Text-to-3D Generation? A Progressive Investigation: Yiwen Tang,

Zoey Guo,

Kaixin Zhu,

Ray Zhang,

Qizhi Chen,

Dongzhi Jiang,

Junli Liu,

Bohan Zeng,

Haoming Song,

Delin Qu,

Tianyi Bai,

Dan Xu,

Wentao Zhang,

Bin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yiwen and Guo, Zoey and Zhu, Kaixin and Zhang, Ray and Chen, Qizhi and Jiang, Dongzhi and Liu, Junli and Zeng, Bohan and Song, Haoming and Qu, Delin and Bai, Tianyi and Xu, Dan and Zhang, Wentao and Zhao, Bin}, title = {Are We Ready for RL in Text-to-3D Generation? A Progressive Investigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3197-3207} }
Modeling the Brain's Grammar: ROI-Guided fMRI Pretraining for Transferable and Interpretable Vision Decoding: Yulong Liu,

Hua Xu,

Yiyang Cai,

Chunyang Jiang,

Sirui Han,

Yike Guo; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yulong and Xu, Hua and Cai, Yiyang and Jiang, Chunyang and Han, Sirui and Guo, Yike}, title = {Modeling the Brain's Grammar: ROI-Guided fMRI Pretraining for Transferable and Interpretable Vision Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6900-6909} }
OSPO: Object-Centric Self-Improving Preference Optimization for Text-to-Image Generation: Yoonjin Oh,

Yongjin Kim,

Hyomin Kim,

Donghwan Chi,

Sungwoong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Yoonjin and Kim, Yongjin and Kim, Hyomin and Chi, Donghwan and Kim, Sungwoong}, title = {OSPO: Object-Centric Self-Improving Preference Optimization for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7620-7631} }
Divide, then Ground: Adapting Frame Selection to Query Types for Long-Form Video Understanding: Jialuo Li,

Bin Li,

Jiahao Li,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jialuo and Li, Bin and Li, Jiahao and Lu, Yan}, title = {Divide, then Ground: Adapting Frame Selection to Query Types for Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11369-11380} }
Diffusion Guided Chain-of-Vision for Large Autoregressive Vision Models: Xinyang Wang,

Kecheng Zheng,

Minfeng Zhu,

Wei Wu,

Fan Lu,

Wei Zhai,

Wei Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyang and Zheng, Kecheng and Zhu, Minfeng and Wu, Wei and Lu, Fan and Zhai, Wei and Chen, Wei}, title = {Diffusion Guided Chain-of-Vision for Large Autoregressive Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2357-2368} }
Protego: User-Centric Pose-Invariant Privacy Protection Against Face Recognition-Induced Digital Footprint Exposure: Ziling Wang,

Shuya Yang,

Jialin Lu,

Ka-Ho Chow; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziling and Yang, Shuya and Lu, Jialin and Chow, Ka-Ho}, title = {Protego: User-Centric Pose-Invariant Privacy Protection Against Face Recognition-Induced Digital Footprint Exposure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10293-10302} }
RMIR: A Benchmark Dataset for Reasoning-Intensive Multimodal Image Retrieval: Yijiang Li,

Kunal Kotian,

Ali Marjaninejad,

Meir Friedenberg,

Kaushik Pavani,

Sunny Dasgupta; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yijiang and Kotian, Kunal and Marjaninejad, Ali and Friedenberg, Meir and Pavani, Kaushik and Dasgupta, Sunny}, title = {RMIR: A Benchmark Dataset for Reasoning-Intensive Multimodal Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2725-2734} }
MeshWeaver: Sparse-Voxel-Guided Surface Weaving for Autoregressive Mesh Generation: Jiale Xu,

Wang Zhao,

Ying Shan; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiale and Zhao, Wang and Shan, Ying}, title = {MeshWeaver: Sparse-Voxel-Guided Surface Weaving for Autoregressive Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5912-5922} }
ReMoT: Reinforcement Learning with Motion Contrast Triplets: Cong Wan,

Zeyu Guo,

Jiangyang Li,

Songlin Dong,

Yifan Bai,

Lin Peng,

Zhiheng Ma,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Cong and Guo, Zeyu and Li, Jiangyang and Dong, Songlin and Bai, Yifan and Peng, Lin and Ma, Zhiheng and Gong, Yihong}, title = {ReMoT: Reinforcement Learning with Motion Contrast Triplets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5487-5498} }
LLMind: Bio-inspired Training-free Adaptive Visual Representations for Vision-Language Models: Soumyaratna Debnath,

Bui Duc Manh,

Zinan Liu,

Lin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Debnath_2026_CVPR, author = {Debnath, Soumyaratna and Manh, Bui Duc and Liu, Zinan and Wang, Lin}, title = {LLMind: Bio-inspired Training-free Adaptive Visual Representations for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3133-3142} }
TextOVSR: Text-Guided Real-World Opera Video Super-Resolution: Hua Chang,

Xin Xu,

Wei Liu,

Jiayi Wu,

Kui Jiang,

Fei Ma,

Qi Tian; [pdf] [arXiv]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Hua and Xu, Xin and Liu, Wei and Wu, Jiayi and Jiang, Kui and Ma, Fei and Tian, Qi}, title = {TextOVSR: Text-Guided Real-World Opera Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2156-2165} }
Garments2Look: A Multi-Reference Dataset for High-Fidelity Outfit-Level Virtual Try-On with Clothing and Accessories: Junyao Hu,

Zhongwei Cheng,

Waikeung Wong,

Xingxing Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Junyao and Cheng, Zhongwei and Wong, Waikeung and Zou, Xingxing}, title = {Garments2Look: A Multi-Reference Dataset for High-Fidelity Outfit-Level Virtual Try-On with Clothing and Accessories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1123-1133} }
Adaptive Capacity Autoregressive Visual Tracking: Tong Lin,

Yifan Bai,

Shiyi Liang,

Ruigang Niu,

Xing Wei; [pdf]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Tong and Bai, Yifan and Liang, Shiyi and Niu, Ruigang and Wei, Xing}, title = {Adaptive Capacity Autoregressive Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13574-13583} }
Stable Mean Flow: Lyapunov-Inspired One-Step Flow Matching: Guangxun Zhang,

Mason Haberle,

Davi Geiger; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guangxun and Haberle, Mason and Geiger, Davi}, title = {Stable Mean Flow: Lyapunov-Inspired One-Step Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9223-9232} }
Incentivizing Generative Zero-Shot Learning via Outcome-Reward Reinforcement Learning with Visual Cues: Wenjin Hou,

Xiaoxiao Sun,

Hehe Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Wenjin and Sun, Xiaoxiao and Fan, Hehe}, title = {Incentivizing Generative Zero-Shot Learning via Outcome-Reward Reinforcement Learning with Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5499-5510} }
3D-Fixer: Coarse-to-Fine In-place Completion for 3D Scenes from a Single Image: Ze-Xin Yin,

Liu Liu,

Xinjie Wang,

Wei Sui,

Zhizhong Su,

Jian Yang,

Jin Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Ze-Xin and Liu, Liu and Wang, Xinjie and Sui, Wei and Su, Zhizhong and Yang, Jian and Xie, Jin}, title = {3D-Fixer: Coarse-to-Fine In-place Completion for 3D Scenes from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12753-12763} }
Breaking the Illusion: When Positive Meets Negative in Multimodal Decoding: Yubo Jiang,

Yitong An,

Xin Yang,

Abudukelimu Wuerkaixi,

Xuxin Cheng,

Fengying Xie,

Zhiguo Jiang,

Cao Liu,

Ke Zeng,

Haopeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yubo and An, Yitong and Yang, Xin and Wuerkaixi, Abudukelimu and Cheng, Xuxin and Xie, Fengying and Jiang, Zhiguo and Liu, Cao and Zeng, Ke and Zhang, Haopeng}, title = {Breaking the Illusion: When Positive Meets Negative in Multimodal Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4210-4220} }
CrowdGaussian: Reconstructing High-Fidelity 3D Gaussians for Human Crowd from a Single Image: Yizheng Song,

Yiyu Zhuang,

Qipeng Xu,

Haixiang Wang,

Jiahe Zhu,

Jing Tian,

Siyu Zhu,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yizheng and Zhuang, Yiyu and Xu, Qipeng and Wang, Haixiang and Zhu, Jiahe and Tian, Jing and Zhu, Siyu and Zhu, Hao}, title = {CrowdGaussian: Reconstructing High-Fidelity 3D Gaussians for Human Crowd from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11006-11016} }
EffectErase: Joint Video Object Removal and Insertion for High-Quality Effect Erasing: Yang Fu,

Yike Zheng,

Ziyun Dai,

Henghui Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Yang and Zheng, Yike and Dai, Ziyun and Ding, Henghui}, title = {EffectErase: Joint Video Object Removal and Insertion for High-Quality Effect Erasing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2005-2014} }
SOUPLE: Enhancing Audio-Visual Localization and Segmentation with Learnable Prompt Contexts: Khanh Binh Nguyen,

Chae Jung Park; [pdf] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Khanh Binh and Park, Chae Jung}, title = {SOUPLE: Enhancing Audio-Visual Localization and Segmentation with Learnable Prompt Contexts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8674-8683} }
ReDirector: Creating Any-Length Video Retakes with Rotary Camera Encoding: Byeongjun Park,

Byung-Hoon Kim,

Hyungjin Chung,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Byeongjun and Kim, Byung-Hoon and Chung, Hyungjin and Ye, Jong Chul}, title = {ReDirector: Creating Any-Length Video Retakes with Rotary Camera Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11163-11173} }
Heuristic-inspired Reasoning Priors Facilitate Data-Efficient Referring Object Detection: Xu Zhang,

Zhe Chen,

Jing Zhang,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xu and Chen, Zhe and Zhang, Jing and Tao, Dacheng}, title = {Heuristic-inspired Reasoning Priors Facilitate Data-Efficient Referring Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10082-10092} }
SoC: Semantic Orthogonal Calibration for Test-Time Prompt Tuning: Leo Fillioux,

Omprakash Chakraborty,

Ismail Ben Ayed,

Paul-Henry Cournède,

Stergios Christodoulidis,

Maria Vakalopoulou,

Jose Dolz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fillioux_2026_CVPR, author = {Fillioux, Leo and Chakraborty, Omprakash and Ben Ayed, Ismail and Courn\`ede, Paul-Henry and Christodoulidis, Stergios and Vakalopoulou, Maria and Dolz, Jose}, title = {SoC: Semantic Orthogonal Calibration for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4772-4782} }
Learning a Unified Latent Action Space from Videos with Action-centric Cycle Consistency: Guangyan Chen,

Qi Shao,

Te Cui,

Zichen Zhou,

Weixin Mao,

Luojie Yang,

Meiling Wang,

Yi Yang,

Hua Chen,

Yufeng Yue; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Guangyan and Shao, Qi and Cui, Te and Zhou, Zichen and Mao, Weixin and Yang, Luojie and Wang, Meiling and Yang, Yi and Chen, Hua and Yue, Yufeng}, title = {Learning a Unified Latent Action Space from Videos with Action-centric Cycle Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12871-12880} }
Language-Free Generative Editing from One Visual Example: Omar Elezabi,

Eduard Zamfir,

Zongwei Wu,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elezabi_2026_CVPR, author = {Elezabi, Omar and Zamfir, Eduard and Wu, Zongwei and Timofte, Radu}, title = {Language-Free Generative Editing from One Visual Example}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1078-1088} }
EmoTaG: Emotion-Aware Talking Head Synthesis on Gaussian Splatting with Few-Shot Personalization: Haolan Xu,

Keli Cheng,

Lei Wang,

Ning Bi,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Haolan and Cheng, Keli and Wang, Lei and Bi, Ning and Liu, Xiaoming}, title = {EmoTaG: Emotion-Aware Talking Head Synthesis on Gaussian Splatting with Few-Shot Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10921-10931} }
Plant Taxonomy Meets Plant Counting: A Fine-Grained, Taxonomic Dataset for Counting Hundreds of Plant Species: Jinyu Xu,

Tianqi Hu,

Xiaonan Hu,

Letian Zhou,

Songliang Cao,

Meng Zhang,

Hao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jinyu and Hu, Tianqi and Hu, Xiaonan and Zhou, Letian and Cao, Songliang and Zhang, Meng and Lu, Hao}, title = {Plant Taxonomy Meets Plant Counting: A Fine-Grained, Taxonomic Dataset for Counting Hundreds of Plant Species}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {167-177} }
Heterogeneous Decentralized Diffusion Models: Zhiying Jiang,

Raihan Seraj,

Marcos Villagra,

Bidhan Roy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhiying and Seraj, Raihan and Villagra, Marcos and Roy, Bidhan}, title = {Heterogeneous Decentralized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2391-2400} }
Goal-Driven Reward by Video Diffusion Models for Reinforcement Learning: Qi Wang,

Mian Wu,

Yuyang Zhang,

Mingqi Yuan,

Wenyao Zhang,

Haoxiang You,

Yunbo Wang,

Xin Jin,

Xiaokang Yang,

Wenjun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qi and Wu, Mian and Zhang, Yuyang and Yuan, Mingqi and Zhang, Wenyao and You, Haoxiang and Wang, Yunbo and Jin, Xin and Yang, Xiaokang and Zeng, Wenjun}, title = {Goal-Driven Reward by Video Diffusion Models for Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8077-8086} }
Enhancing Hands in 3D Whole-Body Pose Estimation with Conditional Hands Modulator: Gyeongsik Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, Gyeongsik}, title = {Enhancing Hands in 3D Whole-Body Pose Estimation with Conditional Hands Modulator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8891-8900} }
GeniNav: Generative Model Driven Image-Goal Navigation via Imagination-Guided Consistency Flow Matching: Yuqi Chen,

Junjie Gao,

Yongzhou Pan,

Siyuan Song,

Zixuan Zhang,

Jiaping Xiao,

Mir Feroskhan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuqi and Gao, Junjie and Pan, Yongzhou and Song, Siyuan and Zhang, Zixuan and Xiao, Jiaping and Feroskhan, Mir}, title = {GeniNav: Generative Model Driven Image-Goal Navigation via Imagination-Guided Consistency Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {996-1005} }
CineSRD: Leveraging Visual, Acoustic, and Linguistic Cues for Open-World Visual Media Speaker Diarization: Liangbin Huang,

Xiaohua Liao,

Chaoqun Cui,

Shijing Wang,

Zhaolong Huang,

Yanlong Du,

Wenji Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Liangbin and Liao, Xiaohua and Cui, Chaoqun and Wang, Shijing and Huang, Zhaolong and Du, Yanlong and Mao, Wenji}, title = {CineSRD: Leveraging Visual, Acoustic, and Linguistic Cues for Open-World Visual Media Speaker Diarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8836-8845} }
Learning Personalized Photographic Style from Pairwise User Preferences: Jinwoo Kim,

Jihye Yoo,

Seon Joo Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jinwoo and Yoo, Jihye and Kim, Seon Joo}, title = {Learning Personalized Photographic Style from Pairwise User Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1134-1144} }
DA-Mamba: Learning Domain-Aware State Space Model for Global-Local Alignment in Domain Adaptive Object Detection: Haochen Li,

Rui Zhang,

Hantao Yao,

Xin Zhang,

Yifan Hao,

Shaohui Peng,

Yongwei Zhao,

Ling Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haochen and Zhang, Rui and Yao, Hantao and Zhang, Xin and Hao, Yifan and Peng, Shaohui and Zhao, Yongwei and Li, Ling}, title = {DA-Mamba: Learning Domain-Aware State Space Model for Global-Local Alignment in Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8000-8010} }
Material Magic Wand: Material-Aware Grouping of 3D Parts in Untextured Meshes: Umangi Jain,

Vladimir Kim,

Matheus Gadelha,

Igor Gilitschenski,

Zhiqin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2026_CVPR, author = {Jain, Umangi and Kim, Vladimir and Gadelha, Matheus and Gilitschenski, Igor and Chen, Zhiqin}, title = {Material Magic Wand: Material-Aware Grouping of 3D Parts in Untextured Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6377-6387} }
LEADER: Learning Reliable Local-to-Global Correspondences for LiDAR Relocalization: Jianshi Wu,

Minghang Zhu,

Dunqiang Liu,

Wen Li,

Sheng Ao,

Siqi Shen,

Chenglu Wen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jianshi and Zhu, Minghang and Liu, Dunqiang and Li, Wen and Ao, Sheng and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {LEADER: Learning Reliable Local-to-Global Correspondences for LiDAR Relocalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9932-9942} }
SwiftVLA: Unlocking Spatiotemporal Dynamics for Lightweight VLA Models at Minimal Overhead: Chaojun Ni,

Cheng Chen,

Xiaofeng Wang,

Zheng Zhu,

Wenzhao Zheng,

Boyuan Wang,

Tianrun Chen,

Guosheng Zhao,

Haoyun Li,

Zhehao Dong,

Qiang Zhang,

Yun Ye,

Yang Wang,

Guan Huang,

Wenjun Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Chaojun and Chen, Cheng and Wang, Xiaofeng and Zhu, Zheng and Zheng, Wenzhao and Wang, Boyuan and Chen, Tianrun and Zhao, Guosheng and Li, Haoyun and Dong, Zhehao and Zhang, Qiang and Ye, Yun and Wang, Yang and Huang, Guan and Mei, Wenjun}, title = {SwiftVLA: Unlocking Spatiotemporal Dynamics for Lightweight VLA Models at Minimal Overhead}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13474-13485} }
SpatiaLQA: A Benchmark for Evaluating Spatial Logical Reasoning in Vision-Language Models: Yuechen Xie,

Xiaoyan Zhang,

Yicheng Shan,

Zhu Hao,

Rui Tang,

Rong Wei,

Mingli Song,

Yuanyu Wan,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yuechen and Zhang, Xiaoyan and Shan, Yicheng and Hao, Zhu and Tang, Rui and Wei, Rong and Song, Mingli and Wan, Yuanyu and Song, Jie}, title = {SpatiaLQA: A Benchmark for Evaluating Spatial Logical Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2645-2657} }
Omni-MMSI: Toward Identity-attributed Social Interaction Understanding: Xinpeng Li,

Bolin Lai,

Hardy Chen,

Shijian Deng,

Cihang Xie,

Yuyin Zhou,

James M. Rehg,

Yapeng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinpeng and Lai, Bolin and Chen, Hardy and Deng, Shijian and Xie, Cihang and Zhou, Yuyin and Rehg, James M. and Tian, Yapeng}, title = {Omni-MMSI: Toward Identity-attributed Social Interaction Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8684-8696} }
Unsupervised 3d Motion Estimation Using Event Camera: Han Han,

Wei Zhai,

Tiesong Zhao,

Bin Li,

Yang Cao,

Zheng-jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Han and Zhai, Wei and Zhao, Tiesong and Li, Bin and Cao, Yang and Zha, Zheng-jun}, title = {Unsupervised 3d Motion Estimation Using Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8067-8076} }
Beyond Duality: A Hybrid Framework of Leveraging Shared and Private Features for RGB-Event Object Detection: Keyao Wang,

Shuai Liu,

Hengda Shi,

Lukui Shi,

Haiyong Chen; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Keyao and Liu, Shuai and Shi, Hengda and Shi, Lukui and Chen, Haiyong}, title = {Beyond Duality: A Hybrid Framework of Leveraging Shared and Private Features for RGB-Event Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4415-4424} }
HierUQ: Hierarchical Uncertainty Quantification with Adaptive Granularity Reconciliation for Degraded Image Classification: Yang Chu,

Xiaomeng Yang,

Keli Deng,

Yuntao Qian; [pdf] [supp]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Yang and Yang, Xiaomeng and Deng, Keli and Qian, Yuntao}, title = {HierUQ: Hierarchical Uncertainty Quantification with Adaptive Granularity Reconciliation for Degraded Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11240-11249} }
Stable Spike: Dual Consistency Optimization via Bitwise AND Operations for Spiking Neural Networks: Yongqi Ding,

Kunshan Yang,

Linze Li,

Yiyang Zhang,

Mengmeng Jing,

Lin Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Yongqi and Yang, Kunshan and Li, Linze and Zhang, Yiyang and Jing, Mengmeng and Zuo, Lin}, title = {Stable Spike: Dual Consistency Optimization via Bitwise AND Operations for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {892-902} }
Selfi: Self-improving Reconstruction Engine via 3D Geometric Feature Alignment: Youming Deng,

Songyou Peng,

Junyi Zhang,

Kathryn Heal,

Tiancheng Sun,

John Flynn,

Steve Marschner,

Lucy Chai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Youming and Peng, Songyou and Zhang, Junyi and Heal, Kathryn and Sun, Tiancheng and Flynn, John and Marschner, Steve and Chai, Lucy}, title = {Selfi: Self-improving Reconstruction Engine via 3D Geometric Feature Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7351-7361} }
Token Reduction via Local and Global Contexts Optimization for Efficient Video Large Language Models: Jinlong Li,

Liyuan Jiang,

Haonan Zhang,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinlong and Jiang, Liyuan and Zhang, Haonan and Sebe, Nicu}, title = {Token Reduction via Local and Global Contexts Optimization for Efficient Video Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10451-10461} }
Decoupling Defense Strategies for Robust Image Watermarking: Jiahui Chen,

Zehang Deng,

Zeyu Zhang,

Chaoyang Li,

Lianchen Jia,

Lifeng Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiahui and Deng, Zehang and Zhang, Zeyu and Li, Chaoyang and Jia, Lianchen and Sun, Lifeng}, title = {Decoupling Defense Strategies for Robust Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3316-3325} }
What Your Features Reveal: Data-Efficient Black-Box Feature Inversion Attack for Split DNNs: Zhihan Ren,

Lijun He,

Jiaxi Liang,

Xinzhu Fu,

Haixia Bi,

Fan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Zhihan and He, Lijun and Liang, Jiaxi and Fu, Xinzhu and Bi, Haixia and Li, Fan}, title = {What Your Features Reveal: Data-Efficient Black-Box Feature Inversion Attack for Split DNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13366-13375} }
Rethinking Cross-Modal Anchor Alignment for Mitigating Error Accumulation: Bin Liu,

Wei Sun,

Qianqian Wang,

Wei Feng,

Yijie Chen,

Haixi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Bin and Sun, Wei and Wang, Qianqian and Feng, Wei and Chen, Yijie and Zhang, Haixi}, title = {Rethinking Cross-Modal Anchor Alignment for Mitigating Error Accumulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8664-8673} }
WorldReel: 4D Video Generation with Consistent Geometry and Motion Modeling: Shaoheng Fang,

Hanwen Jiang,

Yunpeng Bai,

Niloy J. Mitra,

Qixing Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Shaoheng and Jiang, Hanwen and Bai, Yunpeng and Mitra, Niloy J. and Huang, Qixing}, title = {WorldReel: 4D Video Generation with Consistent Geometry and Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11195-11206} }
Off The Grid: Detection of Primitives for Feed-Forward 3D Gaussian Splatting: Arthur Moreau,

Richard Shaw,

Michal Nazarczuk,

Jisu Shin,

Thomas Tanay,

Zhensong Zhang,

Songcen Xu,

Eduardo Pérez-Pellitero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moreau_2026_CVPR, author = {Moreau, Arthur and Shaw, Richard and Nazarczuk, Michal and Shin, Jisu and Tanay, Thomas and Zhang, Zhensong and Xu, Songcen and P\'erez-Pellitero, Eduardo}, title = {Off The Grid: Detection of Primitives for Feed-Forward 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11756-11766} }
DMGD: Train-Free Dataset Distillation with Semantic-Distribution Matching in Diffusion Models: Qichao Wang,

Yunhong Lu,

Hengyuan Cao,

Junyi Zhang,

Min Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qichao and Lu, Yunhong and Cao, Hengyuan and Zhang, Junyi and Zhang, Min}, title = {DMGD: Train-Free Dataset Distillation with Semantic-Distribution Matching in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12417-12427} }
CamPI: Physical Adversarial Examples through Camera Power Signal Injection: Yanze Ren,

Mingyuan Lv,

Qinhong Jiang,

Yan Jiang,

Chen Yan,

Xiaoyu Ji,

Wenyuan Xu; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Yanze and Lv, Mingyuan and Jiang, Qinhong and Jiang, Yan and Yan, Chen and Ji, Xiaoyu and Xu, Wenyuan}, title = {CamPI: Physical Adversarial Examples through Camera Power Signal Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6611-6620} }
Learning Compact 3D Representations from Feed-Forward Novel View Synthesis: Honggyu An,

Jaewoo Jung,

Mungyeom Kim,

Chaehyun Kim,

Minkyeong Jeon,

Jisang Han,

Kazumi Fukuda,

Takuya Narihira,

Hyunah Ko,

Junsu Kim,

Sunghwan Hong,

Yuki Mitsufuji,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Honggyu and Jung, Jaewoo and Kim, Mungyeom and Kim, Chaehyun and Jeon, Minkyeong and Han, Jisang and Fukuda, Kazumi and Narihira, Takuya and Ko, Hyunah and Kim, Junsu and Hong, Sunghwan and Mitsufuji, Yuki and Kim, Seungryong}, title = {Learning Compact 3D Representations from Feed-Forward Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {363-373} }
ShowUI-p: Flow-based Generative Models as GUI Dexterous Hands: Siyuan Hu,

Kevin Qinghong Lin,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Siyuan and Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {ShowUI-p: Flow-based Generative Models as GUI Dexterous Hands}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8130-8140} }
FontCrafter: High-Fidelity Element-Driven Artistic Font Creation with Visual In-Context Generation: Wuyang Luo,

Chengkai Tan,

Chang Ge,

Binye Hong,

Su Yang,

Yongjiu Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Wuyang and Tan, Chengkai and Ge, Chang and Hong, Binye and Yang, Su and Ma, Yongjiu}, title = {FontCrafter: High-Fidelity Element-Driven Artistic Font Creation with Visual In-Context Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {583-593} }
SceneTok: A Compressed, Diffusable Token Space for 3D Scenes: Mohammad Asim,

Christopher Wewer,

Jan Eric Lenssen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asim_2026_CVPR, author = {Asim, Mohammad and Wewer, Christopher and Lenssen, Jan Eric}, title = {SceneTok: A Compressed, Diffusable Token Space for 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5870-5880} }
DRM: Diffusion-based Reward Model With Step-wise Guidance: Jaxon Zhang,

Binxin Yang,

Hubery Yin,

Chen Li,

Jing LYU; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jaxon and Yang, Binxin and Yin, Hubery and Li, Chen and LYU, Jing}, title = {DRM: Diffusion-based Reward Model With Step-wise Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12764-12774} }
TruckDrive: Long-Range Autonomous Highway Driving Dataset: Filippo Ghilotti,

Edoardo Palladin,

Samuel Brucker,

Adam Sigal,

Mario Bijelic,

Felix Heide; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghilotti_2026_CVPR, author = {Ghilotti, Filippo and Palladin, Edoardo and Brucker, Samuel and Sigal, Adam and Bijelic, Mario and Heide, Felix}, title = {TruckDrive: Long-Range Autonomous Highway Driving Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10587-10598} }
Multimodal Causality-Driven Representation Learning for Generalizable Medical Image Segmentation: Xusheng Liang,

Lihua Zhou,

Nianxin Li,

Miao Xu,

Ziyang Song,

Dong Yi,

Jinlin Wu,

Jiawei Ma,

Hongbin Liu,

Zhen Lei,

Jiebo Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Xusheng and Zhou, Lihua and Li, Nianxin and Xu, Miao and Song, Ziyang and Yi, Dong and Wu, Jinlin and Ma, Jiawei and Liu, Hongbin and Lei, Zhen and Luo, Jiebo}, title = {Multimodal Causality-Driven Representation Learning for Generalizable Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13670-13679} }
GeoFlow: Real-Time Fine-Grained Cross-View Geolocalization via Iterative Flow Prediction: Ayesh Abu Lehyeh,

Xiaohan Zhang,

Ahmad Arrabi,

Waqas Sultani,

Chen Chen,

Safwan Wshah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abu_Lehyeh_2026_CVPR, author = {Abu Lehyeh, Ayesh and Zhang, Xiaohan and Arrabi, Ahmad and Sultani, Waqas and Chen, Chen and Wshah, Safwan}, title = {GeoFlow: Real-Time Fine-Grained Cross-View Geolocalization via Iterative Flow Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5369-5378} }
RealBirdID: Benchmarking Bird Species Identification in the Era of MLLMs: Logan Lawrence,

Oindrila Saha,

Rangel Daroya,

Mustafa Chasmai,

Wuao Liu,

Max Hamilton,

Aaron Sun,

Seoyun Jeong,

Fabien Delattre,

Subhransu Maji,

Grant Van Horn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lawrence_2026_CVPR, author = {Lawrence, Logan and Saha, Oindrila and Daroya, Rangel and Chasmai, Mustafa and Liu, Wuao and Hamilton, Max and Sun, Aaron and Jeong, Seoyun and Delattre, Fabien and Maji, Subhransu and Van Horn, Grant}, title = {RealBirdID: Benchmarking Bird Species Identification in the Era of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2445-2456} }
Diff4Splat: Repurposing Video Diffusion Models for Dynamic Scene Generation: Panwang Pan,

Chenguo Lin,

Chenxin Li,

Jingjing Zhao,

Yuchen Lin,

Haopeng Li,

Yunlong Lin,

Kairun Wen,

Yixuan Yuan,

Yadong MU; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Panwang and Lin, Chenguo and Li, Chenxin and Zhao, Jingjing and Lin, Yuchen and Li, Haopeng and Lin, Yunlong and Wen, Kairun and Yuan, Yixuan and MU, Yadong}, title = {Diff4Splat: Repurposing Video Diffusion Models for Dynamic Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4232-4244} }
X-WIN: Building Chest Radiograph World Model via Predictive Sensing: Zefan Yang,

Ge Wang,

James Hendler,

Mannudeep K. Kalra,

Pingkun Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zefan and Wang, Ge and Hendler, James and Kalra, Mannudeep K. and Yan, Pingkun}, title = {X-WIN: Building Chest Radiograph World Model via Predictive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6920-6930} }
OPRO: Orthogonal Panel-Relative Operators for Panel-Aware In-Context Image Generation: Sanghyeon Lee,

Minwoo Lee,

Euijin Shin,

Kangyeol Kim,

Seunghwan Choi,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghyeon and Lee, Minwoo and Shin, Euijin and Kim, Kangyeol and Choi, Seunghwan and Choo, Jaegul}, title = {OPRO: Orthogonal Panel-Relative Operators for Panel-Aware In-Context Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9233-9242} }
Submodel Extraction for Efficient and Personalized Federated Learning via Optimal Transport: Zheng Jiang,

Nan He,

Yiming Chen,

Lifeng Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zheng and He, Nan and Chen, Yiming and Sun, Lifeng}, title = {Submodel Extraction for Efficient and Personalized Federated Learning via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3369-3378} }
Improving Controllable Generation: Faster Training and Better Performance via x0-Supervision: Amadou S. Sangare,

Adrien Maglo,

Mohamed Chaouch,

Bertrand Luvison; [pdf] [supp]
[bibtex]
@InProceedings{Sangare_2026_CVPR, author = {Sangare, Amadou S. and Maglo, Adrien and Chaouch, Mohamed and Luvison, Bertrand}, title = {Improving Controllable Generation: Faster Training and Better Performance via x0-Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9106-9115} }
AdaIAT: Adaptively Increasing Attention to Generated Text to Alleviate Hallucinations in LVLM: Li'an Zhong,

Ziqiang He,

Jibin Zheng,

Jin Li,

Z. Jane Wang,

Xiangui Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Li'an and He, Ziqiang and Zheng, Jibin and Li, Jin and Wang, Z. Jane and Kang, Xiangui}, title = {AdaIAT: Adaptively Increasing Attention to Generated Text to Alleviate Hallucinations in LVLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11076-11085} }
Visual-RRT: Finding Paths toward Visual-Goals via Differentiable Rendering: Sebin Lee,

Jumin Lee,

Taeyeon Kim,

Youngju Na,

Woobin Im,

Sung-Eui Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sebin and Lee, Jumin and Kim, Taeyeon and Na, Youngju and Im, Woobin and Yoon, Sung-Eui}, title = {Visual-RRT: Finding Paths toward Visual-Goals via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13486-13495} }
STiTch: Semantic Transition and Transportation in Collaboration for Training-Free Zero-Shot Composed Image Retrieval: Miaoge Li,

Dongsheng Wang,

Zening Sun,

Jinsen Zhang,

Wenhan Luo,

Jingcai Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Miaoge and Wang, Dongsheng and Sun, Zening and Zhang, Jinsen and Luo, Wenhan and Guo, Jingcai}, title = {STiTch: Semantic Transition and Transportation in Collaboration for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12374-12384} }
Role-SynthCLIP: A Role-Play Driven Diverse Synthetic Data Approach: Yuanxiang Huangfu,

Chaochao Wang,

Weilei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huangfu_2026_CVPR, author = {Huangfu, Yuanxiang and Wang, Chaochao and Wang, Weilei}, title = {Role-SynthCLIP: A Role-Play Driven Diverse Synthetic Data Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10142-10151} }
MAPo: Motion-Aware Partitioning of Deformable 3D Gaussian Splatting for High-Fidelity Dynamic Scene Reconstruction: Han Jiao,

Jiakai Sun,

Yexing Xu,

Lei Zhao,

Wei Xing,

Huaizhong Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Han and Sun, Jiakai and Xu, Yexing and Zhao, Lei and Xing, Wei and Lin, Huaizhong}, title = {MAPo: Motion-Aware Partitioning of Deformable 3D Gaussian Splatting for High-Fidelity Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11767-11776} }
D-Prism: Differentiable Primitives for Structured Dynamic Modeling: Xingyuan Yu,

Yijin Li,

Chong Zeng,

Yuhang Ming,

Hujun Bao,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xingyuan and Li, Yijin and Zeng, Chong and Ming, Yuhang and Bao, Hujun and Zhang, Guofeng}, title = {D-Prism: Differentiable Primitives for Structured Dynamic Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7556-7566} }
LiDAS: Lighting-driven Dynamic Active Sensing for Nighttime Perception: Simon de Moreau,

Andrei Bursuc,

Hafid El Idrissi,

Fabien Moutarde; [pdf] [supp]
[bibtex]
@InProceedings{de_Moreau_2026_CVPR, author = {de Moreau, Simon and Bursuc, Andrei and El Idrissi, Hafid and Moutarde, Fabien}, title = {LiDAS: Lighting-driven Dynamic Active Sensing for Nighttime Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14188-14197} }
AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention: Lei Xiao,

Jifeng Li,

Juntao Gao,

Feiyang Ye,

Yan Jin,

Jingjing Qian,

Jing Zhang,

Yong Wu,

Xiaoyuan Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Lei and Li, Jifeng and Gao, Juntao and Ye, Feiyang and Jin, Yan and Qian, Jingjing and Zhang, Jing and Wu, Yong and Yu, Xiaoyuan}, title = {AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13453-13463} }
EgoControl: Controllable Egocentric Video Generation via 3D Full-Body Poses: Enrico Pallotta,

Sina Mokhtarzadeh Azar,

Lars Doorenbos,

Serdar Ozsoy,

Umar Iqbal,

Juergen Gall; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pallotta_2026_CVPR, author = {Pallotta, Enrico and Azar, Sina Mokhtarzadeh and Doorenbos, Lars and Ozsoy, Serdar and Iqbal, Umar and Gall, Juergen}, title = {EgoControl: Controllable Egocentric Video Generation via 3D Full-Body Poses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4269-4279} }
Act2See: Emergent Active Visual Perception for Video Reasoning: Martin Q. Ma,

Yuxiao Qu,

Aditya Agrawal,

Willis Guo,

Paul Pu Liang,

Ruslan Salakhutdinov,

Louis-Philippe Morency; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Martin Q. and Qu, Yuxiao and Agrawal, Aditya and Guo, Willis and Liang, Paul Pu and Salakhutdinov, Ruslan and Morency, Louis-Philippe}, title = {Act2See: Emergent Active Visual Perception for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5455-5464} }
RaGS: Unleashing 3D Gaussian Splatting from 4D Radar and Monocular Cue for 3D Object Detection: Xiaokai Bai,

Chenxu Zhou,

Lianqing Zheng,

Jianan Liu,

Si-Yuan Cao,

Xiaohan Zhang,

Yiming Li,

Zhengzhuang Zhang,

Hui-Liang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Xiaokai and Zhou, Chenxu and Zheng, Lianqing and Liu, Jianan and Cao, Si-Yuan and Zhang, Xiaohan and Li, Yiming and Zhang, Zhengzhuang and Shen, Hui-Liang}, title = {RaGS: Unleashing 3D Gaussian Splatting from 4D Radar and Monocular Cue for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4983-4992} }
Parameter-efficient Continual Learning for Enhancing Plasticity without Forgetting under Limited Model Capacity: Yitian Chen,

Shigeng Zhang,

Xuan Liu,

Mingming Lu,

Kai Chen,

Hongye Zhu,

Xinning Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yitian and Zhang, Shigeng and Liu, Xuan and Lu, Mingming and Chen, Kai and Zhu, Hongye and Chen, Xinning}, title = {Parameter-efficient Continual Learning for Enhancing Plasticity without Forgetting under Limited Model Capacity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10789-10798} }
From Few-way to Many-way: Rethinking Few-shot Fine-grained Image Classification: Li-Jun Zhao,

Zhen-Duo Chen,

Xin Luo,

Xin-Shun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Li-Jun and Chen, Zhen-Duo and Luo, Xin and Xu, Xin-Shun}, title = {From Few-way to Many-way: Rethinking Few-shot Fine-grained Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12364-12373} }
High-Precision Dichotomous Image Segmentation via Depth Integrity-Prior and Fine-Grained Patch Strategy: Xianjie Liu,

Keren Fu,

Qijun Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xianjie and Fu, Keren and Zhao, Qijun}, title = {High-Precision Dichotomous Image Segmentation via Depth Integrity-Prior and Fine-Grained Patch Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6357-6366} }
Ani3DHuman: Photorealistic 3D Human Animation with Self-guided Stochastic Sampling: Qi Sun,

Can Wang,

Jiaxiang Shang,

Yingchun Liu,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Qi and Wang, Can and Shang, Jiaxiang and Liu, Yingchun and Liao, Jing}, title = {Ani3DHuman: Photorealistic 3D Human Animation with Self-guided Stochastic Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12651-12662} }
A Debiased Reconstruction-based Framework for Training-Free Detection of AI-Generated Images: Sungik Choi,

Hankook Lee,

Jaehoon Lee,

Robin Kim,

Stanley Jungkyu Choi,

Moontae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Sungik and Lee, Hankook and Lee, Jaehoon and Kim, Robin and Choi, Stanley Jungkyu and Lee, Moontae}, title = {A Debiased Reconstruction-based Framework for Training-Free Detection of AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3254-3263} }
StyleTextGen: Style-Conditioned Multilingual Scene Text Generation: Zeyu Chen,

Fangmin Zhao,

Yan Shu,

Yichao Liu,

Liu Yu,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zeyu and Zhao, Fangmin and Shu, Yan and Liu, Yichao and Yu, Liu and Zhou, Yu}, title = {StyleTextGen: Style-Conditioned Multilingual Scene Text Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7643-7653} }
Single-step Diffusion-based Video Coding with Semantic-Temporal Guidance: Naifu Xue,

Zhaoyang Jia,

Jiahao Li,

Bin Li,

Zihan Zheng,

Yuan Zhang,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Naifu and Jia, Zhaoyang and Li, Jiahao and Li, Bin and Zheng, Zihan and Zhang, Yuan and Lu, Yan}, title = {Single-step Diffusion-based Video Coding with Semantic-Temporal Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9752-9761} }
Neural Collapse in Test-Time Adaptation: Xiao Chen,

Zhongjing Du,

Jiazhen Huang,

Xu Jiang,

Li Lu,

Jingyan Jiang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Du, Zhongjing and Huang, Jiazhen and Jiang, Xu and Lu, Li and Jiang, Jingyan and Wang, Zhi}, title = {Neural Collapse in Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10567-10576} }
ManifoldGD: Training-Free Hierarchical Manifold Guidance for Diffusion-Based Dataset Distillation: Ayush Roy,

Wei-Yang Alex Lee,

Rudrasis Chakraborty,

Vishnu Suresh Lokhande; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roy_2026_CVPR, author = {Roy, Ayush and Lee, Wei-Yang Alex and Chakraborty, Rudrasis and Lokhande, Vishnu Suresh}, title = {ManifoldGD: Training-Free Hierarchical Manifold Guidance for Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12406-12416} }
Flash-DMD: Towards High-Fidelity Few-Step Image Generation with Efficient Distillation and Joint Reinforcement Learning: Guanjie Chen,

Shirui Huang,

Yifu Sun,

Kai Liu,

Jianchen Zhu,

Xiaoye Qu,

Yu Cheng,

Peng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Guanjie and Huang, Shirui and Sun, Yifu and Liu, Kai and Zhu, Jianchen and Qu, Xiaoye and Cheng, Yu and Chen, Peng}, title = {Flash-DMD: Towards High-Fidelity Few-Step Image Generation with Efficient Distillation and Joint Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6010-6020} }
MambaSIC: Mamba-based Stereo Image Compression with Bi-directional Multi-reference Entropy Model: Shiyu Qin,

Xinjie Zhang,

Zhening Liu,

Jinpeng Wang,

Bin Chen,

Jiawei Li,

Yifan Ren,

Shu-Tao Xia,

Jun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Shiyu and Zhang, Xinjie and Liu, Zhening and Wang, Jinpeng and Chen, Bin and Li, Jiawei and Ren, Yifan and Xia, Shu-Tao and Zhang, Jun}, title = {MambaSIC: Mamba-based Stereo Image Compression with Bi-directional Multi-reference Entropy Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5306-5315} }
ActivePolicy: Active Gaussian Reconstruction and Optimization Strategy Based on Global-Local Information Gain: Yingzhao Li,

Yanjie Liu,

Lijun Zhao; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yingzhao and Liu, Yanjie and Zhao, Lijun}, title = {ActivePolicy: Active Gaussian Reconstruction and Optimization Strategy Based on Global-Local Information Gain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5003-5013} }
Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models: Keuntae Kim,

Mingyu Kang,

Yong Suk Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Keuntae and Kang, Mingyu and Choi, Yong Suk}, title = {Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5154-5164} }
Visual-Aware CoT: Achieving High-Fidelity Visual Consistency in Unified Models: Zixuan Ye,

Quande Liu,

Cong Wei,

Yuanxing Zhang,

Xintao Wang,

Pengfei Wan,

Kun Gai,

Wenhan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Zixuan and Liu, Quande and Wei, Cong and Zhang, Yuanxing and Wang, Xintao and Wan, Pengfei and Gai, Kun and Luo, Wenhan}, title = {Visual-Aware CoT: Achieving High-Fidelity Visual Consistency in Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9116-9126} }
Adaptive Learned Image Compression with Graph Neural Networks: Yunuo Chen,

Bing He,

Zezheng Lyu,

Hongwei Hu,

Qunshan Gu,

Yuan Tian,

Guo Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yunuo and He, Bing and Lyu, Zezheng and Hu, Hongwei and Gu, Qunshan and Tian, Yuan and Lu, Guo}, title = {Adaptive Learned Image Compression with Graph Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12150-12161} }
ProOOD: Prototype-Guided Out-of-Distribution 3D Occupancy Prediction: Yuheng Zhang,

Mengfei Duan,

Kunyu Peng,

Yuhang Wang,

Di Wen,

Danda Pani Paudel,

Luc Van Gool,

Kailun Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuheng and Duan, Mengfei and Peng, Kunyu and Wang, Yuhang and Wen, Di and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun}, title = {ProOOD: Prototype-Guided Out-of-Distribution 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14241-14252} }
ZeroIDIR: Zero-Reference Illumination Degradation Image Restoration with Perturbed Consistency Diffusion Models: Hai Jiang,

Zhen Liu,

Yinjie Lei,

Songchen Han,

Bing Zeng,

Shuaicheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Hai and Liu, Zhen and Lei, Yinjie and Han, Songchen and Zeng, Bing and Liu, Shuaicheng}, title = {ZeroIDIR: Zero-Reference Illumination Degradation Image Restoration with Perturbed Consistency Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1320-1330} }
HiFi-Inpaint: Towards High-Fidelity Reference-Based Inpainting for Generating Detail-Preserving Human-Product Images: Yichen Liu,

Donghao Zhou,

Jie Wang,

Xin Gao,

Guisheng Liu,

Jiatong Li,

Quanwei Zhang,

Qiang Lyu,

Lanqing Guo,

Shilei Wen,

Weiqiang Wang,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yichen and Zhou, Donghao and Wang, Jie and Gao, Xin and Liu, Guisheng and Li, Jiatong and Zhang, Quanwei and Lyu, Qiang and Guo, Lanqing and Wen, Shilei and Wang, Weiqiang and Heng, Pheng-Ann}, title = {HiFi-Inpaint: Towards High-Fidelity Reference-Based Inpainting for Generating Detail-Preserving Human-Product Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1994-2004} }
ClusterMark: Towards Robust Watermarking for Autoregressive Image Generators with Visual Token Clustering: Denis Lukovnikov,

Andreas Müller,

Erwin Quiring,

Asja Fischer; [pdf] [supp]
[bibtex]
@InProceedings{Lukovnikov_2026_CVPR, author = {Lukovnikov, Denis and M\"uller, Andreas and Quiring, Erwin and Fischer, Asja}, title = {ClusterMark: Towards Robust Watermarking for Autoregressive Image Generators with Visual Token Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9213-9222} }
The Coherence Trap: When MLLM-Crafted Narratives Exploit Manipulated Visual Contexts: Yuchen Zhang,

Yaxiong Wang,

Yujiao Wu,

Lianwei Wu,

Li Zhu,

Zhedong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuchen and Wang, Yaxiong and Wu, Yujiao and Wu, Lianwei and Zhu, Li and Zheng, Zhedong}, title = {The Coherence Trap: When MLLM-Crafted Narratives Exploit Manipulated Visual Contexts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8760-8769} }
Customized Fusion: A Closed-Loop Dynamic Network for Adaptive Multi-Task-Aware Infrared-Visible Image Fusion: Zengyi Yang,

Yu Liu,

Juan Cheng,

Zhiqin Zhu,

Yafei Zhang,

Huafeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zengyi and Liu, Yu and Cheng, Juan and Zhu, Zhiqin and Zhang, Yafei and Li, Huafeng}, title = {Customized Fusion: A Closed-Loop Dynamic Network for Adaptive Multi-Task-Aware Infrared-Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {188-198} }
FUSER: Feed-Forward Multiview 3D Registration Transformer and SE(3)$^N$ Diffusion Refinement: Haobo Jiang,

Jin Xie,

Jian Yang,

Liang Yu,

Jianmin Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haobo and Xie, Jin and Yang, Jian and Yu, Liang and Zheng, Jianmin}, title = {FUSER: Feed-Forward Multiview 3D Registration Transformer and SE(3)\${\textasciicircum}N\$ Diffusion Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7393-7403} }
PointCSP: Cross-Sample Semantic Propagation and Stability Preservation in Self-Supervised Point Cloud Learning: Xinxing Yu,

Ajian Liu,

Sunyuan Qiang,

Hui Ma,

Liying Yang,

Yuzhong Wang,

Zhi Rao,

Yanyan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xinxing and Liu, Ajian and Qiang, Sunyuan and Ma, Hui and Yang, Liying and Wang, Yuzhong and Rao, Zhi and Liang, Yanyan}, title = {PointCSP: Cross-Sample Semantic Propagation and Stability Preservation in Self-Supervised Point Cloud Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10016-10026} }
Neighbor GRPO: Contrastive ODE Policy Optimization Aligns Flow Models: Dailan He,

Guanlin Feng,

Xingtong Ge,

Yazhe Niu,

Yi Zhang,

Bingqi Ma,

Guanglu Song,

Yu Liu,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Dailan and Feng, Guanlin and Ge, Xingtong and Niu, Yazhe and Zhang, Yi and Ma, Bingqi and Song, Guanglu and Liu, Yu and Li, Hongsheng}, title = {Neighbor GRPO: Contrastive ODE Policy Optimization Aligns Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6033-6042} }
CustomTex: High-fidelity Indoor Scene Texturing via Multi-Reference Customization: Weilin Chen,

Jiahao Rao,

Wenhao Wang,

Xinyang Li,

Xuan Cheng,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Weilin and Rao, Jiahao and Wang, Wenhao and Li, Xinyang and Cheng, Xuan and Cao, Liujuan}, title = {CustomTex: High-fidelity Indoor Scene Texturing via Multi-Reference Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4280-4290} }
SeeGroup: Multi-Layer Depth Estimation of Transparent Surfaces via Self-Determined Grouping: Hongyu Wen,

Jia Deng; [pdf] [supp]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Hongyu and Deng, Jia}, title = {SeeGroup: Multi-Layer Depth Estimation of Transparent Surfaces via Self-Determined Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7299-7309} }
CrossVL: Complexity-Aware Feature Routing and Paired Curriculum for Cross-View Vision-Language Detection: Zhipeng Liu,

Chunbo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhipeng and Luo, Chunbo}, title = {CrossVL: Complexity-Aware Feature Routing and Paired Curriculum for Cross-View Vision-Language Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10116-10125} }
GFRRN: Explore the Gaps in Single Image Reflection Removal: Yu Chen,

Zewei He,

Xingyu Liu,

Zixuan Chen,

Zhe-Ming Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yu and He, Zewei and Liu, Xingyu and Chen, Zixuan and Lu, Zhe-Ming}, title = {GFRRN: Explore the Gaps in Single Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5690-5699} }
MoVieS: Motion-Aware 4D Dynamic View Synthesis in One Second: Chenguo Lin,

Yuchen Lin,

Panwang Pan,

Yifan Yu,

Tao Hu,

Honglei Yan,

Katerina Fragkiadaki,

Yadong Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Chenguo and Lin, Yuchen and Pan, Panwang and Yu, Yifan and Hu, Tao and Yan, Honglei and Fragkiadaki, Katerina and Mu, Yadong}, title = {MoVieS: Motion-Aware 4D Dynamic View Synthesis in One Second}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {295-306} }
Affostruction: 3D Affordance Grounding with Generative Reconstruction: Chunghyun Park,

Seunghyeon Lee,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Chunghyun and Lee, Seunghyeon and Cho, Minsu}, title = {Affostruction: 3D Affordance Grounding with Generative Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7435-7445} }
Is Parameter Isolation Better for Prompt-Based Continual Learning?: Jiangyang Li,

Chenhao Ding,

SongLin Dong,

Qiang Wang,

Jianchao Zhao,

Yuhang He,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Ding, Chenhao and Dong, SongLin and Wang, Qiang and Zhao, Jianchao and He, Yuhang and Gong, Yihong}, title = {Is Parameter Isolation Better for Prompt-Based Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3887-3897} }
MatPedia: A Universal Generative Foundation for High-Fidelity Material Synthesis: Di Luo,

Shuhui Yang,

Mingxin Yang,

Jiawei Lu,

Yixuan Tang,

Xintong Han,

Zhuo Chen,

Beibei Wang,

Chunchao Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Di and Yang, Shuhui and Yang, Mingxin and Lu, Jiawei and Tang, Yixuan and Han, Xintong and Chen, Zhuo and Wang, Beibei and Guo, Chunchao}, title = {MatPedia: A Universal Generative Foundation for High-Fidelity Material Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8943-8953} }
Cluster-Aware Neural Collapse Prompt Tuning for Long-Tailed Generalization of Vision-Language Models: Boyang Guo,

Liang Li,

Lin Peng,

Yuhan Gao,

Xichun Sheng,

Chenggang Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Boyang and Li, Liang and Peng, Lin and Gao, Yuhan and Sheng, Xichun and Yan, Chenggang}, title = {Cluster-Aware Neural Collapse Prompt Tuning for Long-Tailed Generalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3122-3132} }
Gaussian-Mixture Latent Flow for Stochastic 3D Human Motion Prediction: Yue Ma,

Frederick W. B. Li,

Xiaohui Liang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yue and Li, Frederick W. B. and Liang, Xiaohui}, title = {Gaussian-Mixture Latent Flow for Stochastic 3D Human Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7131-7141} }
MD2E: Modeling Depth-to-Edge Cues for Monocular Metric Depth Estimation: Chao Ning,

Minghe Shen,

Naoto Yokoya; [pdf] [supp]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Shen, Minghe and Yokoya, Naoto}, title = {MD2E: Modeling Depth-to-Edge Cues for Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5772-5782} }
GenSplat: Bridging the Generalization Gap in 3DGS Language Comprehension: Fang Liu,

Yuhao Liu,

Ke Xu,

Gerhard Petrus Hancke,

Rynson W. H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Fang and Liu, Yuhao and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W. H.}, title = {GenSplat: Bridging the Generalization Gap in 3DGS Language Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5221-5231} }
PhyGaP: Physically-Grounded Gaussians with Polarization Cues: Jiale Wu,

Xiaoyang Bai,

Zongqi He,

Weiwei Xu,

Yifan Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jiale and Bai, Xiaoyang and He, Zongqi and Xu, Weiwei and Peng, Yifan}, title = {PhyGaP: Physically-Grounded Gaussians with Polarization Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7278-7288} }
Omni-3DEdit: Generalized Versatile 3D Editing in One-Pass: Liyi Chen,

Pengfei Wang,

Guowen Zhang,

Zhiyuan Ma,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Liyi and Wang, Pengfei and Zhang, Guowen and Ma, Zhiyuan and Zhang, Lei}, title = {Omni-3DEdit: Generalized Versatile 3D Editing in One-Pass}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12640-12650} }
UnReflectAnything: RGB-Only Highlight Removal by Rendering Synthetic Specular Supervision: Alberto Rota,

Mert Kiray,

Mert Asim Karaoglu,

Patrick Ruhkamp,

Elena De Momi,

Nassir Navab,

Benjamin Busam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rota_2026_CVPR, author = {Rota, Alberto and Kiray, Mert and Karaoglu, Mert Asim and Ruhkamp, Patrick and De Momi, Elena and Navab, Nassir and Busam, Benjamin}, title = {UnReflectAnything: RGB-Only Highlight Removal by Rendering Synthetic Specular Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {241-250} }
FOZO: Forward-Only Zeroth-Order Prompt Optimization for Test-Time Adaptation: Xingyu Wang,

Tao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xingyu and Wang, Tao}, title = {FOZO: Forward-Only Zeroth-Order Prompt Optimization for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7936-7945} }
AffordMatcher: Affordance Learning in 3D Scenes from Visual Signifiers: Nghia Vu,

Tuong Do,

Khang Nguyen,

Baoru Huang,

Nhat Le,

Binh Xuan Nguyen,

Erman Tjiputra,

Quang D. Tran,

Ravi Prakash,

Te-Chuan Chiu,

Anh Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vu_2026_CVPR, author = {Vu, Nghia and Do, Tuong and Nguyen, Khang and Huang, Baoru and Le, Nhat and Nguyen, Binh Xuan and Tjiputra, Erman and Tran, Quang D. and Prakash, Ravi and Chiu, Te-Chuan and Nguyen, Anh}, title = {AffordMatcher: Affordance Learning in 3D Scenes from Visual Signifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2635-2644} }
Language-Grounded Decoupled Action Representation for Robotic Manipulation: Wuding Weng,

Tongshu Wu,

Liucheng Chen,

Siyu Xie,

Zheng Wang,

Xing Xu,

Jingkuan Song,

Heng Tao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2026_CVPR, author = {Weng, Wuding and Wu, Tongshu and Chen, Liucheng and Xie, Siyu and Wang, Zheng and Xu, Xing and Song, Jingkuan and Shen, Heng Tao}, title = {Language-Grounded Decoupled Action Representation for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6770-6780} }
Exact-GS: Mathematically Rigorous and Accurate 3D Gaussian Splatting for 3D X-ray Reconstruction: Guangpu Yang,

Steffen Kieß,

Hanxiang Luo,

Xingyu Liu,

Sven Simon; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Guangpu and Kie{\ss}, Steffen and Luo, Hanxiang and Liu, Xingyu and Simon, Sven}, title = {Exact-GS: Mathematically Rigorous and Accurate 3D Gaussian Splatting for 3D X-ray Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4902-4911} }
SDGS: Spatial Difference Guided Gaussian Splatting for Simultaneous Localization and 3D Reconstruction: Yijian Tian,

Mingtao Ou,

Zijian Pan,

Xinglong Ji; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Yijian and Ou, Mingtao and Pan, Zijian and Ji, Xinglong}, title = {SDGS: Spatial Difference Guided Gaussian Splatting for Simultaneous Localization and 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4860-4869} }
ShreddingNet: Coarse-to-Fine Restoration for Multi-Source Shredded Manuscripts: Haoyang Cui,

Hao Jiang,

Yadong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Haoyang and Jiang, Hao and Mu, Yadong}, title = {ShreddingNet: Coarse-to-Fine Restoration for Multi-Source Shredded Manuscripts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8247-8256} }
RegFormer: Transferable Relational Grounding for Efficient Weakly-Supervised Human-Object Interaction Detection: Jihwan Park,

Chanhyeong Yang,

Jinyoung Park,

Taehoon Song,

Hyunwoo J. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jihwan and Yang, Chanhyeong and Park, Jinyoung and Song, Taehoon and Kim, Hyunwoo J.}, title = {RegFormer: Transferable Relational Grounding for Efficient Weakly-Supervised Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10387-10396} }
MotionScale: Reconstructing Appearance, Geometry, and Motion of Dynamic Scenes with Scalable 4D Gaussian Splatting: Haoran Zhou,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Haoran and Lee, Gim Hee}, title = {MotionScale: Reconstructing Appearance, Geometry, and Motion of Dynamic Scenes with Scalable 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11860-11870} }
Better than Average: Spatially-Aware Aggregation of Segmentation Uncertainty Improves Downstream Performance: Vanessa Emanuela Guarino,

Claudia Winklmayr,

Jannik Franzen,

Josef Lorenz Rumberger,

Manuel Pfeuffer,

Sonja Greven,

Klaus Maier-Hein,

Dagmar Kainmueller,

Christoph Karg,

Carsten T. Lüth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guarino_2026_CVPR, author = {Guarino, Vanessa Emanuela and Winklmayr, Claudia and Franzen, Jannik and Rumberger, Josef Lorenz and Pfeuffer, Manuel and Greven, Sonja and Maier-Hein, Klaus and Kainmueller, Dagmar and Karg, Christoph and L\"uth, Carsten T.}, title = {Better than Average: Spatially-Aware Aggregation of Segmentation Uncertainty Improves Downstream Performance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13145-13156} }
Real-Time Multimodal Fingertip Contact Detection via Depth and Motion Fusion for Vision-Based Human-Computer Interaction: Mukhiddin Toshpulatov,

Wookey Lee,

Suan Lee,

Geehyuk Lee; [pdf] [supp]
[bibtex]
@InProceedings{Toshpulatov_2026_CVPR, author = {Toshpulatov, Mukhiddin and Lee, Wookey and Lee, Suan and Lee, Geehyuk}, title = {Real-Time Multimodal Fingertip Contact Detection via Depth and Motion Fusion for Vision-Based Human-Computer Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1819-1828} }
MMGait: Towards Multi-Modal Gait Recognition: Chenye Wang,

Qingyuan Cai,

Saihui Hou,

Aoqi Li,

Yongzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenye and Cai, Qingyuan and Hou, Saihui and Li, Aoqi and Huang, Yongzhen}, title = {MMGait: Towards Multi-Modal Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1726-1736} }
Differentiable Adaptive 4D Structured Illumination for Joint Capture of Shape and Reflectance: Huakeng Ding,

Yaowen Chen,

Kun Zhou,

Hongzhi Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Huakeng and Chen, Yaowen and Zhou, Kun and Wu, Hongzhi}, title = {Differentiable Adaptive 4D Structured Illumination for Joint Capture of Shape and Reflectance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12448-12457} }
Efficient Real-Time Raw-to-Raw Denoising for Extreme Low-Light Ultra HD Video on Mobile Devices: Charantej Pochimireddy,

Subhasmita Sahoo,

Apoorva Verma,

Palavalli Shyam,

Swapnil Malviya,

Sarvesh Sarvesh,

Raj Gadde; [pdf] [supp]
[bibtex]
@InProceedings{Pochimireddy_2026_CVPR, author = {Pochimireddy, Charantej and Sahoo, Subhasmita and Verma, Apoorva and Shyam, Palavalli and Malviya, Swapnil and Sarvesh, Sarvesh and Gadde, Raj}, title = {Efficient Real-Time Raw-to-Raw Denoising for Extreme Low-Light Ultra HD Video on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1376-1385} }
OneSparse: A Unified Framework for Sparse Activation Layers in Vision Models: Xingkui Zhu,

Dingkang Liang,

Cheng Chen,

Daoxin Zhang,

lv Hanxiang,

Zhe Xu,

Yao Hu,

Xiang Bai; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingkui and Liang, Dingkang and Chen, Cheng and Zhang, Daoxin and Hanxiang, lv and Xu, Zhe and Hu, Yao and Bai, Xiang}, title = {OneSparse: A Unified Framework for Sparse Activation Layers in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12084-12094} }
Content-Aware Frequency Encoding for Implicit Neural Representations with Fourier-Chebyshev Features: Junbo Ke,

Yangyang Xu,

Chao Wang,

You-Wei Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Junbo and Xu, Yangyang and Wang, Chao and Wen, You-Wei}, title = {Content-Aware Frequency Encoding for Implicit Neural Representations with Fourier-Chebyshev Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3646-3655} }
PinPoint: Evaluation of Composed Image Retrieval with Explicit Negatives, Multi-Image Queries, and Paraphrase Testing: Rohan Mahadev,

Joyce Yuan,

Patrick Poirson,

David Xue,

Hao-Yu Wu,

Dmitry Kislyuk; [pdf] [arXiv]
[bibtex]
@InProceedings{Mahadev_2026_CVPR, author = {Mahadev, Rohan and Yuan, Joyce and Poirson, Patrick and Xue, David and Wu, Hao-Yu and Kislyuk, Dmitry}, title = {PinPoint: Evaluation of Composed Image Retrieval with Explicit Negatives, Multi-Image Queries, and Paraphrase Testing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9742-9751} }
Physical Object Understanding with a Physically Controllable World Model: Rahul Venkatesh,

Klemen Kotar,

Lilian Naing Chen,

Wanhee Lee,

Gia Ancone,

Seungwoo Kim,

Luca Thomas Wheeler,

Jared Watrous,

Honglin Chen,

Daniel Bear,

Stefan Stojanov,

Daniel LK Yamins; [pdf] [supp]
[bibtex]
@InProceedings{Venkatesh_2026_CVPR, author = {Venkatesh, Rahul and Kotar, Klemen and Chen, Lilian Naing and Lee, Wanhee and Ancone, Gia and Kim, Seungwoo and Wheeler, Luca Thomas and Watrous, Jared and Chen, Honglin and Bear, Daniel and Stojanov, Stefan and Yamins, Daniel LK}, title = {Physical Object Understanding with a Physically Controllable World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2593-2602} }
Learning Effective Sign Features without Text for Gloss-free Sign Language Translation: Shiwei Gan,

Xiao Liu,

Yafeng Yin,

Nan Liu,

Kuizhuang Liu,

Desibieer Tuerdaken,

Zhiwei Jiang,

Lei Xie,

Sanglu Lu,

Hongkai Wen; [pdf] [supp]
[bibtex]
@InProceedings{Gan_2026_CVPR, author = {Gan, Shiwei and Liu, Xiao and Yin, Yafeng and Liu, Nan and Liu, Kuizhuang and Tuerdaken, Desibieer and Jiang, Zhiwei and Xie, Lei and Lu, Sanglu and Wen, Hongkai}, title = {Learning Effective Sign Features without Text for Gloss-free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9827-9836} }
Towards Foundation Models for 3D Scene Understanding: Instance-Aware Self-Supervised Learning for Point Clouds: Bin Yang,

Mohamed Abdelsamad,

Miao Zhang,

Alexandru Paul Condurache; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Bin and Abdelsamad, Mohamed and Zhang, Miao and Condurache, Alexandru Paul}, title = {Towards Foundation Models for 3D Scene Understanding: Instance-Aware Self-Supervised Learning for Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2938-2947} }
Building a Precise Video Language with Human-AI Oversight: Zhiqiu Lin,

Siyuan Cen,

Chancharik Mitra,

Isaac Li,

Yuhan Huang,

Yu Tong Tiffany Ling,

Hewei Wang,

Irene Pi,

Shihang Zhu,

Yili Han,

Yilun Du,

Deva Ramanan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zhiqiu and Cen, Siyuan and Mitra, Chancharik and Li, Isaac and Huang, Yuhan and Ling, Yu Tong Tiffany and Wang, Hewei and Pi, Irene and Zhu, Shihang and Han, Yili and Du, Yilun and Ramanan, Deva}, title = {Building a Precise Video Language with Human-AI Oversight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11334-11345} }
Clair Obscur: an Illumination-Aware Method for Real-World Image Vectorization: Xingyue Lin,

Shuai Peng,

Xiangyu Xie,

Jianhua Zhu,

Yuxuan Zhou,

Liangcai Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Xingyue and Peng, Shuai and Xie, Xiangyu and Zhu, Jianhua and Zhou, Yuxuan and Gao, Liangcai}, title = {Clair Obscur: an Illumination-Aware Method for Real-World Image Vectorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9161-9170} }
Beyond Strict Pairing: Arbitrarily Paired Training for High-Performance Infrared and Visible Image Fusion: Yanglin Deng,

Tianyang Xu,

Chunyang Cheng,

Hui Li,

Xiaojun Wu,

Josef Kittler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Yanglin and Xu, Tianyang and Cheng, Chunyang and Li, Hui and Wu, Xiaojun and Kittler, Josef}, title = {Beyond Strict Pairing: Arbitrarily Paired Training for High-Performance Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12334-12343} }
PhotoFramer: Multi-modal Image Composition Instruction: Zhiyuan You,

Ke Wang,

He Zhang,

Xin Cai,

Jinjin Gu,

Tianfan Xue,

Chao Dong,

Zhoutong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Zhiyuan and Wang, Ke and Zhang, He and Cai, Xin and Gu, Jinjin and Xue, Tianfan and Dong, Chao and Zhang, Zhoutong}, title = {PhotoFramer: Multi-modal Image Composition Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10197-10207} }
Design Your Ad: Personalized Advertising Image and Text Generation with Unified Autoregressive Models: Yexing Xu,

Wei Feng,

Shen Zhang,

Haohan Wang,

Yuxin Qin,

Yaoyu Li,

Ao Ma,

Yuhao Luo,

Lu Wang,

Xudong Ren,

Haoran Wang,

Run Ling,

Zheng Zhang,

Jingjing Lv,

Junjie Shen,

Ching Law,

Longguang Wang,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yexing and Feng, Wei and Zhang, Shen and Wang, Haohan and Qin, Yuxin and Li, Yaoyu and Ma, Ao and Luo, Yuhao and Wang, Lu and Ren, Xudong and Wang, Haoran and Ling, Run and Zhang, Zheng and Lv, Jingjing and Shen, Junjie and Law, Ching and Wang, Longguang and Guo, Yulan}, title = {Design Your Ad: Personalized Advertising Image and Text Generation with Unified Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {472-483} }
Will Multimodal Models Be Dazzled by Multi-Image Visual Puzzles?: Zhi Zhu,

YaoQi Fan,

Zhe Chen,

Yue Cao,

Yangzhou Liu,

Tong Lu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhi and Fan, YaoQi and Chen, Zhe and Cao, Yue and Liu, Yangzhou and Lu, Tong}, title = {Will Multimodal Models Be Dazzled by Multi-Image Visual Puzzles?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11943-11953} }
Unpaired Image Deraining Using Reward-Guided Self-Reinforcement Strategy: Yinghao Chen,

Yeying Jin,

Xiang Chen,

Yanyan Wei,

Ziyang Yan,

Yaowen Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yinghao and Jin, Yeying and Chen, Xiang and Wei, Yanyan and Yan, Ziyang and Fu, Yaowen}, title = {Unpaired Image Deraining Using Reward-Guided Self-Reinforcement Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1342-1354} }
Models as Lego Builders: Assembling Malice from Benign Blocks via Semantic Blueprints: Chenxi Li,

Xianggan Liu,

Dake Shen,

Yaosong Du,

Zhibo Yao,

Hao Jiang,

Linyi Jiang,

Chengwei Cao,

Jingzhe Zhang,

RanYi Peng,

Peiling Bai,

Xiande Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenxi and Liu, Xianggan and Shen, Dake and Du, Yaosong and Yao, Zhibo and Jiang, Hao and Jiang, Linyi and Cao, Chengwei and Zhang, Jingzhe and Peng, RanYi and Bai, Peiling and Huang, Xiande}, title = {Models as Lego Builders: Assembling Malice from Benign Blocks via Semantic Blueprints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1533-1542} }
GroundVTS: Visual Token Sampling in Multimodal Large Language Models for Video Temporal Grounding: Rong Fan,

Kaiyan Xiao,

Minghao Zhu,

Liuyi Wang,

Kai Dai,

Zhao Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Rong and Xiao, Kaiyan and Zhu, Minghao and Wang, Liuyi and Dai, Kai and Yang, Zhao}, title = {GroundVTS: Visual Token Sampling in Multimodal Large Language Models for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10408-10418} }
The Power of Prior: Training-Free Open-Vocabulary Semantic Segmentation with LLaVA: Bingfeng Zhang,

Siyue Yu,

Hui Li,

Jiahua Lin,

Wenwu Wang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bingfeng and Yu, Siyue and Li, Hui and Lin, Jiahua and Wang, Wenwu and Xiao, Jimin}, title = {The Power of Prior: Training-Free Open-Vocabulary Semantic Segmentation with LLaVA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6336-6345} }
Vanast: Virtual Try-On with Human Image Animation via Synthetic Triplet Supervision: Hyunsoo Cha,

Wonjung Woo,

Byungjun Kim,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2026_CVPR, author = {Cha, Hyunsoo and Woo, Wonjung and Kim, Byungjun and Joo, Hanbyul}, title = {Vanast: Virtual Try-On with Human Image Animation via Synthetic Triplet Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3988-3997} }
Image Guides Images: Consistent Video Amodal Completion with Rectified In-Context Exemplar Guidance: Xiaoyu Kong,

Ketong Ren,

Dongyu She,

Weiming Dong,

Miao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Xiaoyu and Ren, Ketong and She, Dongyu and Dong, Weiming and Wang, Miao}, title = {Image Guides Images: Consistent Video Amodal Completion with Rectified In-Context Exemplar Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8257-8266} }
HG-Lane: High-Fidelity Generation of Lane Scenes under Adverse Weather and Lighting Conditions without Re-annotation: Daichao Zhao,

Qiupu Chen,

Feng He,

Xin Ning,

Qiankun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Daichao and Chen, Qiupu and He, Feng and Ning, Xin and Li, Qiankun}, title = {HG-Lane: High-Fidelity Generation of Lane Scenes under Adverse Weather and Lighting Conditions without Re-annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8439-8448} }
ParticleGS: Learning Neural Gaussian Particle Dynamics from Videos for Prior-free Physical Motion Extrapolation: Jinsheng Quan,

Qiaowei Miao,

Yichao Xu,

Zizhuo Lin,

Ying Li,

Wei Yang,

Zhihui Li,

Yawei Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Quan_2026_CVPR, author = {Quan, Jinsheng and Miao, Qiaowei and Xu, Yichao and Lin, Zizhuo and Li, Ying and Yang, Wei and Li, Zhihui and Luo, Yawei}, title = {ParticleGS: Learning Neural Gaussian Particle Dynamics from Videos for Prior-free Physical Motion Extrapolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8331-8341} }
CoRoGS: Contextual Gaussian Splatting for Robust Large-Deviation View Synthesis: Xin Ma,

Peng Lu,

Yisong Chen,

Chengwei Pan,

Sheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Xin and Lu, Peng and Chen, Yisong and Pan, Chengwei and Li, Sheng}, title = {CoRoGS: Contextual Gaussian Splatting for Robust Large-Deviation View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8288-8297} }
ReAG: Reasoning-Augmented Generation for Knowledge-based Visual Question Answering: Alberto Compagnoni,

Marco Morini,

Sara Sarto,

Federico Cocchi,

Davide Caffagni,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Compagnoni_2026_CVPR, author = {Compagnoni, Alberto and Morini, Marco and Sarto, Sara and Cocchi, Federico and Caffagni, Davide and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {ReAG: Reasoning-Augmented Generation for Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11901-11911} }
LiDeRe: A Lightweight Readout for Fast and Data-Efficient Dense Prediction: Timo Lüddecke,

Jan Frederik Meier,

Jan van Delden,

Alexander Ecker; [pdf] [supp]
[bibtex]
@InProceedings{Luddecke_2026_CVPR, author = {L\"uddecke, Timo and Meier, Jan Frederik and van Delden, Jan and Ecker, Alexander}, title = {LiDeRe: A Lightweight Readout for Fast and Data-Efficient Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2959-2971} }
DFD-HR: Generalizable Deepfake Detection via Hierarchical Routing Learning: Jiamu Sun,

Zhiyuan Yan,

Ke-Yue Zhang,

Taiping Yao,

Shouhong Ding; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jiamu and Yan, Zhiyuan and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong}, title = {DFD-HR: Generalizable Deepfake Detection via Hierarchical Routing Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13984-13995} }
Soul: Breathe Life into Digital Human for High-fidelity Long-term Multimodal Animation: Jiangning Zhang,

Junwei Zhu,

Zhenye Gan,

Donghao Luo,

Chuming Lin,

FeiFan Xu,

Xu Peng,

Jianlong Hu,

Yuansen Liu,

Yijia Hong,

Weijian Cao,

Han Feng,

Xu Chen,

Chencan Fu,

Keke He,

Xiaobin Hu,

Chengjie Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiangning and Zhu, Junwei and Gan, Zhenye and Luo, Donghao and Lin, Chuming and Xu, FeiFan and Peng, Xu and Hu, Jianlong and Liu, Yuansen and Hong, Yijia and Cao, Weijian and Feng, Han and Chen, Xu and Fu, Chencan and He, Keke and Hu, Xiaobin and Wang, Chengjie}, title = {Soul: Breathe Life into Digital Human for High-fidelity Long-term Multimodal Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3953-3964} }
Data Leakage Detection and De-duplication in Large Scale Geospatial Image Datasets: Yeshwanth Kumar Adimoolam,

Charalambos Poullis,

Melinos Averkiou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Adimoolam_2026_CVPR, author = {Adimoolam, Yeshwanth Kumar and Poullis, Charalambos and Averkiou, Melinos}, title = {Data Leakage Detection and De-duplication in Large Scale Geospatial Image Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {72-81} }
ConsistCompose: Unified Multimodal Layout Control for Image Composition: Xuanke Shi,

Boxuan Li,

Xiaoyang Han,

Zhongang Cai,

Lei Yang,

Quan Wang,

Dahua Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Xuanke and Li, Boxuan and Han, Xiaoyang and Cai, Zhongang and Yang, Lei and Wang, Quan and Lin, Dahua}, title = {ConsistCompose: Unified Multimodal Layout Control for Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {495-505} }
iSplat: Iterative Learning for Fine-Grained Gaussian Splatting: Haifeng Wu,

Wei Long,

Shuhang Gu,

Lixin Duan,

Wen Li; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haifeng and Long, Wei and Gu, Shuhang and Duan, Lixin and Li, Wen}, title = {iSplat: Iterative Learning for Fine-Grained Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11746-11755} }
Gyro-based Deep Video Deblurring: Jaesung Rim,

Woohyeok Kim,

Haeyun Lee,

Heemin Yang,

Ke Wang,

Sunghyun Cho; [pdf] [supp]
[bibtex]
@InProceedings{Rim_2026_CVPR, author = {Rim, Jaesung and Kim, Woohyeok and Lee, Haeyun and Yang, Heemin and Wang, Ke and Cho, Sunghyun}, title = {Gyro-based Deep Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8364-8374} }
PSDesigner: Automated Graphic Design with a Human-Like Creative Workflow: Xincheng Shuai,

Song Tang,

Yutong Huang,

Henghui Ding,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shuai_2026_CVPR, author = {Shuai, Xincheng and Tang, Song and Huang, Yutong and Ding, Henghui and Tao, Dacheng}, title = {PSDesigner: Automated Graphic Design with a Human-Like Creative Workflow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10165-10175} }
TWEO: Transformers Without Extreme Outliers Enables FP8 Training And Quantization For Dummies: Guang Liang,

Jie Shao,

Ningyuan Tang,

Xinyao Liu,

Jianxin Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Guang and Shao, Jie and Tang, Ningyuan and Liu, Xinyao and Wu, Jianxin}, title = {TWEO: Transformers Without Extreme Outliers Enables FP8 Training And Quantization For Dummies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6095-6105} }
HandVQA: Diagnosing and Improving Fine-Grained Spatial Reasoning about Hands in Vision-Language Models: MD Khalequzzaman Chowdhury Sayem,

Mubarrat Tajoar Chowdhury,

Yihalem Yimolal Tiruneh,

Muneeb A. Khan,

Muhammad Salman Ali,

Binod Bhattarai,

Seungryul Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sayem_2026_CVPR, author = {Sayem, MD Khalequzzaman Chowdhury and Chowdhury, Mubarrat Tajoar and Tiruneh, Yihalem Yimolal and Khan, Muneeb A. and Ali, Muhammad Salman and Bhattarai, Binod and Baek, Seungryul}, title = {HandVQA: Diagnosing and Improving Fine-Grained Spatial Reasoning about Hands in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2515-2525} }
FALCON: False-Negative Aware Learning of Contrastive Negatives in Vision-Language Alignment: Myunsoo Kim,

Seongwoong Shim,

Byung-Jun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Myunsoo and Shim, Seongwoong and Lee, Byung-Jun}, title = {FALCON: False-Negative Aware Learning of Contrastive Negatives in Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {701-711} }
MMFace-DiT: A Dual-Stream Diffusion Transformer for High-Fidelity Multimodal Face Generation: Bharath Krishnamurthy,

Ajita Rattani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krishnamurthy_2026_CVPR, author = {Krishnamurthy, Bharath and Rattani, Ajita}, title = {MMFace-DiT: A Dual-Stream Diffusion Transformer for High-Fidelity Multimodal Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4580-4589} }
3D-Aware Implicit Motion Control for View-Adaptive Human Video Generation: Zhixue Fang,

Xu He,

Songlin Tang,

Haoxian Zhang,

Qingfeng Li,

Xiaoqiang Liu,

Pengfei Wan,

Kun Gai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Zhixue and He, Xu and Tang, Songlin and Zhang, Haoxian and Li, Qingfeng and Liu, Xiaoqiang and Wan, Pengfei and Gai, Kun}, title = {3D-Aware Implicit Motion Control for View-Adaptive Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2243-2252} }
Graph2Eval: Automatic Multimodal Task Generation for Agents via Knowledge Graphs: Yurun Chen,

Xueyu Hu,

Yuhan Liu,

Ziqi Wang,

Zeyi Liao,

Lin Chen,

Feng Wei,

Yuxi Qian,

Bo Zheng,

Keting Yin,

Shengyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yurun and Hu, Xueyu and Liu, Yuhan and Wang, Ziqi and Liao, Zeyi and Chen, Lin and Wei, Feng and Qian, Yuxi and Zheng, Bo and Yin, Keting and Zhang, Shengyu}, title = {Graph2Eval: Automatic Multimodal Task Generation for Agents via Knowledge Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {735-744} }
VisionLeaf: Entropy-Guided Leaf-First Reasoning for Efficient and Accurate Think-with-Image: Haokun Gui,

Senqiao Yang,

Mingkang Zhu,

Meng Chu,

Sitong Wu,

Changsheng Lu,

Zihao Wang,

Zhuotao Tian,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Gui_2026_CVPR, author = {Gui, Haokun and Yang, Senqiao and Zhu, Mingkang and Chu, Meng and Wu, Sitong and Lu, Changsheng and Wang, Zihao and Tian, Zhuotao and Jia, Jiaya}, title = {VisionLeaf: Entropy-Guided Leaf-First Reasoning for Efficient and Accurate Think-with-Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5187-5198} }
EgoX: Egocentric Video Generation from a Single Exocentric Video: Taewoong Kang,

Kinam Kim,

Dohyeon Kim,

Minho Park,

Junha Hyung,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Taewoong and Kim, Kinam and Kim, Dohyeon and Park, Minho and Hyung, Junha and Choo, Jaegul}, title = {EgoX: Egocentric Video Generation from a Single Exocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11116-11126} }
The Missing GAP: From Solving Square Jigsaw Puzzles to Handling Real World Archaeological Fragments: Ofir Itzhak Shahar,

Gur Elkin,

Ohad Ben-Shahar; [pdf] [supp]
[bibtex]
@InProceedings{Shahar_2026_CVPR, author = {Shahar, Ofir Itzhak and Elkin, Gur and Ben-Shahar, Ohad}, title = {The Missing GAP: From Solving Square Jigsaw Puzzles to Handling Real World Archaeological Fragments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3186-3196} }
COT-FM: Cluster-wise Optimal Transport Flow Matching: Chiensheng Chiang,

Kuan-Hsun Tu,

Jia-Wei Liao,

Cheng-Fu Chou,

Tsung-Wei Ke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chiang_2026_CVPR, author = {Chiang, Chiensheng and Tu, Kuan-Hsun and Liao, Jia-Wei and Chou, Cheng-Fu and Ke, Tsung-Wei}, title = {COT-FM: Cluster-wise Optimal Transport Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11515-11524} }
KASALv2: Fully Automatic 3D Rotational Symmetry Classification and Axis Localization: Mengxin Zhang,

Yulin Wang,

Chen Luo,

Yongzhe Li,

Yijun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mengxin and Wang, Yulin and Luo, Chen and Li, Yongzhe and Zhou, Yijun}, title = {KASALv2: Fully Automatic 3D Rotational Symmetry Classification and Axis Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13866-13875} }
LF-BVN: Blind-View Network for Self-Supervised Light Field Denoising: Longzhao Guo,

Shuo Zhang,

Chen Gao,

Qian Tian,

Youfang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Longzhao and Zhang, Shuo and Gao, Chen and Tian, Qian and Lin, Youfang}, title = {LF-BVN: Blind-View Network for Self-Supervised Light Field Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1355-1364} }
Test-time Sparsity for Extreme Fast Action Diffusion: Kangye Ji,

Yuan Meng,

Jianbo Zhou,

Ye Li,

Chen Tang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Kangye and Meng, Yuan and Zhou, Jianbo and Li, Ye and Tang, Chen and Wang, Zhi}, title = {Test-time Sparsity for Extreme Fast Action Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9414-9423} }
Look Before You Fuse: 2D-Guided Cross-Modal Alignment for Robust 3D Detection: Xiang Li,

Zhangchi Hu,

Xu Xiao,

Bin Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Hu, Zhangchi and Xiao, Xu and Kong, Bin}, title = {Look Before You Fuse: 2D-Guided Cross-Modal Alignment for Robust 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11589-11598} }
The devil is in the details: Enhancing Video Virtual Try-On via Keyframe-Driven Details Injection: Qingdong He,

Xueqin Chen,

Yanjie Pan,

Peng Tang,

Pengcheng Xu,

Zhenye Gan,

Chengjie Wang,

Xiaobin Hu,

Jiangning Zhang,

Yabiao Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qingdong and Chen, Xueqin and Pan, Yanjie and Tang, Peng and Xu, Pengcheng and Gan, Zhenye and Wang, Chengjie and Hu, Xiaobin and Zhang, Jiangning and Wang, Yabiao}, title = {The devil is in the details: Enhancing Video Virtual Try-On via Keyframe-Driven Details Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9182-9191} }
ChronoGS: Disentangling Invariants and Changes in Multi-Period Scenes: Zhongtao Wang,

Jiaqi Dai,

Qingtian Zhu,

Yilong Li,

Mai Su,

Fei Zhu,

Meng Gai,

Shaorong Wang,

Chengwei Pan,

Yisong Chen,

Guoping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongtao and Dai, Jiaqi and Zhu, Qingtian and Li, Yilong and Su, Mai and Zhu, Fei and Gai, Meng and Wang, Shaorong and Pan, Chengwei and Chen, Yisong and Wang, Guoping}, title = {ChronoGS: Disentangling Invariants and Changes in Multi-Period Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8298-8307} }
Progress-Think: Semantic Progress Reasoning for Vision-Language Navigation: Shuo Wang,

Yucheng Wang,

Guoxin Lian,

Yongcai Wang,

Maiyue Chen,

Kaihui Wang,

Bo Zhang,

Zhizhong Su,

Yutian Zhou,

Wanting Li,

Deying Li,

Zhaoxin Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shuo and Wang, Yucheng and Lian, Guoxin and Wang, Yongcai and Chen, Maiyue and Wang, Kaihui and Zhang, Bo and Su, Zhizhong and Zhou, Yutian and Li, Wanting and Li, Deying and Fan, Zhaoxin}, title = {Progress-Think: Semantic Progress Reasoning for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4076-4086} }
PRIMU: Uncertainty Estimation for Novel Views in Gaussian Splatting from Primitive-Based Representations of Error and Coverage: Thomas Gottwald,

Edgar Heinert,

Peter Stehr,

Chamuditha Jayanga Galappaththige,

Matthias Rottmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gottwald_2026_CVPR, author = {Gottwald, Thomas and Heinert, Edgar and Stehr, Peter and Galappaththige, Chamuditha Jayanga and Rottmann, Matthias}, title = {PRIMU: Uncertainty Estimation for Novel Views in Gaussian Splatting from Primitive-Based Representations of Error and Coverage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11871-11880} }
Seeing the Scene Matters: Revealing Forgetting in Video Understanding Models with a Scene-Aware Long-Video Benchmark: Seng Nam Chen,

Hao Chen,

Chenglam Ho,

Xinyu Mao,

Jinping Wang,

Yu Zhang,

Chao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Seng Nam and Chen, Hao and Ho, Chenglam and Mao, Xinyu and Wang, Jinping and Zhang, Yu and Li, Chao}, title = {Seeing the Scene Matters: Revealing Forgetting in Video Understanding Models with a Scene-Aware Long-Video Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4515-4525} }
CraftMesh: High-Fidelity Generative Mesh Manipulation via Poisson Seamless Fusion: James Jincheng Hu,

Yuxiao Wu,

Youcheng Cai,

Ligang Liu; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, James Jincheng and Wu, Yuxiao and Cai, Youcheng and Liu, Ligang}, title = {CraftMesh: High-Fidelity Generative Mesh Manipulation via Poisson Seamless Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5934-5944} }
ACPV-Net: All-Class Polygonal Vectorization for Seamless Vector Map Generation from Aerial Imagery: Weiqin Jiao,

Hao Cheng,

George Vosselman,

Claudio Persello; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Weiqin and Cheng, Hao and Vosselman, George and Persello, Claudio}, title = {ACPV-Net: All-Class Polygonal Vectorization for Seamless Vector Map Generation from Aerial Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13244-13253} }
Fighting Hallucinations with Counterfactuals: Diffusion-Guided Perturbations for LVLM Hallucination Suppression: Hamidreza Dastmalchi,

Aijun An,

Ali Cheraghian,

Hamed Barzamini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dastmalchi_2026_CVPR, author = {Dastmalchi, Hamidreza and An, Aijun and Cheraghian, Ali and Barzamini, Hamed}, title = {Fighting Hallucinations with Counterfactuals: Diffusion-Guided Perturbations for LVLM Hallucination Suppression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4178-4187} }
PromptStereo: Zero-Shot Stereo Matching via Structure and Motion Prompts: Xianqi Wang,

Hao Yang,

Hangtian Wang,

Junda Cheng,

Gangwei Xu,

Min Lin,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xianqi and Yang, Hao and Wang, Hangtian and Cheng, Junda and Xu, Gangwei and Lin, Min and Yang, Xin}, title = {PromptStereo: Zero-Shot Stereo Matching via Structure and Motion Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12565-12575} }
Calibri: Enhancing Diffusion Transformers via Parameter-Efficient Calibration: Danil Tokhchukov,

Aysel Mirzoeva,

Andrey Kuznetsov,

Konstantin Sobolev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tokhchukov_2026_CVPR, author = {Tokhchukov, Danil and Mirzoeva, Aysel and Kuznetsov, Andrey and Sobolev, Konstantin}, title = {Calibri: Enhancing Diffusion Transformers via Parameter-Efficient Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4635-4644} }
Similarity-Consistent Likelihood Diffusion enables Hidden Person Detection from Wall Reflections: Zhiwen Zheng,

Hao Zhou,

Huiyu Qi,

Zhao Huang,

Guangyuan Zhang,

Shaowei Jiang,

Wenwen Tang,

Bin Yang,

Jin Liu,

Xiaoshuai Zhang,

Xingru Huang; [pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiwen and Zhou, Hao and Qi, Huiyu and Huang, Zhao and Zhang, Guangyuan and Jiang, Shaowei and Tang, Wenwen and Yang, Bin and Liu, Jin and Zhang, Xiaoshuai and Huang, Xingru}, title = {Similarity-Consistent Likelihood Diffusion enables Hidden Person Detection from Wall Reflections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13908-13917} }
CogniVerse: Revolutionizing Multi-Modal Retrieval-Augmented Generation with Cognitive Reflection and Geometric Reasoning: Xiang Fang,

Wanlong Fang,

Changshuo Wang; [pdf]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Xiang and Fang, Wanlong and Wang, Changshuo}, title = {CogniVerse: Revolutionizing Multi-Modal Retrieval-Augmented Generation with Cognitive Reflection and Geometric Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7923-7935} }
Batman: Benign Knowledge Alignment Through Malicious Null Space in Federated Backdoor Attack: Wenwen He,

Wenke Huang,

Yiyang Fang,

Wenjie Qu,

Jiaheng Zhang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Wenwen and Huang, Wenke and Fang, Yiyang and Qu, Wenjie and Zhang, Jiaheng and Ye, Mang}, title = {Batman: Benign Knowledge Alignment Through Malicious Null Space in Federated Backdoor Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13316-13325} }
Bezier Degradation Modeling for LiDAR-based Human Motion Capture: Xiaoqi An,

Lin Zhao,

Jun Li,

Chen Gong,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Xiaoqi and Zhao, Lin and Li, Jun and Gong, Chen and Yang, Jian}, title = {Bezier Degradation Modeling for LiDAR-based Human Motion Capture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14027-14037} }
Adaptive Anisotropic Gaussian Splatting for Multi-contrast MRI Arbitrary-Scale Super-Resolution with Anatomy Guidance: Qiuhai Yan,

Kang Chen,

Zhengjie Lu,

Tingting Wang,

Faming Fang,

Guixu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Qiuhai and Chen, Kang and Lu, Zhengjie and Wang, Tingting and Fang, Faming and Zhang, Guixu}, title = {Adaptive Anisotropic Gaussian Splatting for Multi-contrast MRI Arbitrary-Scale Super-Resolution with Anatomy Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2188-2197} }
DualSplat: Robust 3D Gaussian Splatting via Pseudo-Mask Bootstrapping from Reconstruction Failures: Xu Wang,

Zhiru Wang,

Shiyun Xie,

Chengwei Pan,

Yisong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xu and Wang, Zhiru and Xie, Shiyun and Pan, Chengwei and Chen, Yisong}, title = {DualSplat: Robust 3D Gaussian Splatting via Pseudo-Mask Bootstrapping from Reconstruction Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4912-4921} }
ArtPro: Self-Supervised Articulated Object Reconstruction with Adaptive Integration of Mobility Proposals: Xuelu Li,

Zhaonan Wang,

Xiaogang Wang,

Lei Wu,

Manyi Li,

Changhe Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuelu and Wang, Zhaonan and Wang, Xiaogang and Wu, Lei and Li, Manyi and Tu, Changhe}, title = {ArtPro: Self-Supervised Articulated Object Reconstruction with Adaptive Integration of Mobility Proposals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13897-13907} }
Enabling Supervised Learning of Generative Signatures for Generalized AI-Generated Images Detection: Jianwei Fei,

Yunshu Dai,

Xiaoyu Zhou,

Zhihua Xia,

Alessandro Piva; [pdf]
[bibtex]
@InProceedings{Fei_2026_CVPR, author = {Fei, Jianwei and Dai, Yunshu and Zhou, Xiaoyu and Xia, Zhihua and Piva, Alessandro}, title = {Enabling Supervised Learning of Generative Signatures for Generalized AI-Generated Images Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14084-14094} }
Incentivizing Versatile Video Reasoning in MLLMs via Data-Efficient Reinforcement Learning: Xiaodong Wang,

Zhirong Wu,

Langling Huang,

Yuxi Zheng,

Peixi Peng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaodong and Wu, Zhirong and Huang, Langling and Zheng, Yuxi and Peng, Peixi}, title = {Incentivizing Versatile Video Reasoning in MLLMs via Data-Efficient Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5444-5454} }
Efficient All-Pairs Correlation Volume Sampling for Optical Flow Estimation: Karlis Martins Briedis,

Studios,

ETH Zurich 0000-0003-4012-6292,

Markus Gross,

Studios,

ETH Zurich 0009-0003-9324-779X,

Christopher Schroers,

Studios 0000-0003-1473-1878; [pdf] [supp]
[bibtex]
@InProceedings{Briedis_2026_CVPR, author = {Briedis, Karlis Martins and Studios and 0000-0003-4012-6292, ETH Zurich and Gross, Markus and Studios and 0009-0003-9324-779X, ETH Zurich and Schroers, Christopher and 0000-0003-1473-1878, Studios}, title = {Efficient All-Pairs Correlation Volume Sampling for Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5700-5709} }
Multimodal Continual Instruction Tuning with Dynamic Gradient Guidance: Songze Li,

Mingyu Gao,

Tonghua Su,

Xu-Yao Zhang,

Zhongjie Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Songze and Gao, Mingyu and Su, Tonghua and Zhang, Xu-Yao and Wang, Zhongjie}, title = {Multimodal Continual Instruction Tuning with Dynamic Gradient Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10820-10829} }
AnyID: Ultra-Fidelity Universal Identity-Preserving Video Generation from Any Visual References: Jiahao Wang,

Hualian Sheng,

Sijia Cai,

Yuxiao Yang,

Weizhan Zhang,

Caixia Yan,

Bing Deng,

Jieping Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Sheng, Hualian and Cai, Sijia and Yang, Yuxiao and Zhang, Weizhan and Yan, Caixia and Deng, Bing and Ye, Jieping}, title = {AnyID: Ultra-Fidelity Universal Identity-Preserving Video Generation from Any Visual References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12808-12817} }
ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving: Zhiyu Zheng,

Shaoyu Chen,

Haoran Yin,

Xinbang Zhang,

Jialv Zou,

Xinggang Wang,

Qian Zhang,

Lefei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiyu and Chen, Shaoyu and Yin, Haoran and Zhang, Xinbang and Zou, Jialv and Wang, Xinggang and Zhang, Qian and Zhang, Lefei}, title = {ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3729-3739} }
FoSS: Modeling Long-Range Dependencies and Multimodal Uncertainty in Trajectory Prediction via Fourier-State Space Integration: Yizhou Huang,

Genze Jiang,

Yihua Cheng,

Kezhi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yizhou and Jiang, Genze and Cheng, Yihua and Wang, Kezhi}, title = {FoSS: Modeling Long-Range Dependencies and Multimodal Uncertainty in Trajectory Prediction via Fourier-State Space Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3750-3760} }
ParallelVLM: Lossless Video-LLM Acceleration with Visual Alignment Aware Parallel Speculative Decoding: Quan Kong,

Yuhao Shen,

Yicheng Ji,

Huan Li,

Cong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Quan and Shen, Yuhao and Ji, Yicheng and Li, Huan and Wang, Cong}, title = {ParallelVLM: Lossless Video-LLM Acceleration with Visual Alignment Aware Parallel Speculative Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11392-11402} }
Rethinking Knowledge Transfer in Image Quality Assessment: A Perceptual Preference Structure Alignment Perspective: Aobo Li,

Jinjian Wu,

Yongxu Liu,

Jupo Ma,

Weisheng Dong; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Aobo and Wu, Jinjian and Liu, Yongxu and Ma, Jupo and Dong, Weisheng}, title = {Rethinking Knowledge Transfer in Image Quality Assessment: A Perceptual Preference Structure Alignment Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1310-1319} }
EMO-R3: Reflective Reinforcement Learning for Emotional Reasoning in Multimodal Large Language Models: Yiyang Fang,

Wenke Huang,

Pei Fu,

Yihao Yang,

Kehua Su,

Zhenbo Luo,

Jian Luan,

Mang Ye; [pdf] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Yiyang and Huang, Wenke and Fu, Pei and Yang, Yihao and Su, Kehua and Luo, Zhenbo and Luan, Jian and Ye, Mang}, title = {EMO-R3: Reflective Reinforcement Learning for Emotional Reasoning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {745-755} }
Illumination-Consistent Human-Scene Reconstruction from Monocular Video: Rongbin Zheng,

Wensheng Li,

Lingzhe Zeng,

Dong Wang,

Chengying Gao; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Rongbin and Li, Wensheng and Zeng, Lingzhe and Wang, Dong and Gao, Chengying}, title = {Illumination-Consistent Human-Scene Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14050-14061} }
MixFlow Training: Alleviating Exposure Bias with Slowed Interpolation Mixture: Hui Li,

Jiayue Lyu,

Fu-Yun Wang,

Kaihui Cheng,

Siyu Zhu,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hui and Lyu, Jiayue and Wang, Fu-Yun and Cheng, Kaihui and Zhu, Siyu and Wang, Jingdong}, title = {MixFlow Training: Alleviating Exposure Bias with Slowed Interpolation Mixture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9095-9105} }
TTP: Test-Time Padding for Adversarial Detection and Robust Adaptation on Vision-Language Models: Zhiwei Li,

Yitian Pang,

Weining Wang,

Zhenan Sun,

Qi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiwei and Pang, Yitian and Wang, Weining and Sun, Zhenan and Li, Qi}, title = {TTP: Test-Time Padding for Adversarial Detection and Robust Adaptation on Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1513-1522} }
DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models: Jin Liu,

Ning Xi,

Yinbin Miao,

Junkang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jin and Xi, Ning and Miao, Yinbin and Liu, Junkang}, title = {DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3358-3368} }
MLLM-HWSI: A Multimodal Large Language Model for Hierarchical Whole Slide Image Understanding: Basit Alawode,

Arif Mahmood,

Muaz Khalifa Al Radi,

Shahad Albastaki,

Asim Khan,

Muhammad Bilal,

Moshira Ali Abdalla,

Mohammed Bennamoun,

Sajid Javed; [pdf] [supp]
[bibtex]
@InProceedings{Alawode_2026_CVPR, author = {Alawode, Basit and Mahmood, Arif and Al Radi, Muaz Khalifa and Albastaki, Shahad and Khan, Asim and Bilal, Muhammad and Abdalla, Moshira Ali and Bennamoun, Mohammed and Javed, Sajid}, title = {MLLM-HWSI: A Multimodal Large Language Model for Hierarchical Whole Slide Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13732-13743} }
Frequency-Aware Flow Matching for High-Quality Image Generation: Sucheng Ren,

Qihang Yu,

Ju He,

Xiaohui Shen,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Sucheng and Yu, Qihang and He, Ju and Shen, Xiaohui and Chen, Liang-Chieh}, title = {Frequency-Aware Flow Matching for High-Quality Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9074-9083} }
SAGE: Style-Adaptive Generalization for Privacy-Constrained Semantic Segmentation Across Domains: Qingmei Li,

Yang Zhang,

Peifeng Zhang,

Haohuan Fu,

Juepeng Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qingmei and Zhang, Yang and Zhang, Peifeng and Fu, Haohuan and Zheng, Juepeng}, title = {SAGE: Style-Adaptive Generalization for Privacy-Constrained Semantic Segmentation Across Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13134-13144} }
Rethinking Dataset Distillation: Hard Truths about Soft Labels: Priyam Dey,

Aditya Sahdev,

Sunny Bhati,

Konda Reddy Mopuri,

Venkatesh Babu Radhakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dey_2026_CVPR, author = {Dey, Priyam and Sahdev, Aditya and Bhati, Sunny and Mopuri, Konda Reddy and Radhakrishnan, Venkatesh Babu}, title = {Rethinking Dataset Distillation: Hard Truths about Soft Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {178-187} }
LIFT and PLACE: A Simple, Stable, and Effective Knowledge Distillation Framework for Lightweight Diffusion Models: Hyunsoo Han,

Sangyeop Yeo,

Jaejun Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Hyunsoo and Yeo, Sangyeop and Yoo, Jaejun}, title = {LIFT and PLACE: A Simple, Stable, and Effective Knowledge Distillation Framework for Lightweight Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5564-5573} }
EditCtrl: Disentangled Local and Global Control for Real-Time Generative Video Editing: Yehonathan Litman,

Shikun Liu,

Dario Seyb,

Nicholas Milef,

Yang Zhou,

Carl Marshall,

Shubham Tulsiani,

Caleb Leak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Litman_2026_CVPR, author = {Litman, Yehonathan and Liu, Shikun and Seyb, Dario and Milef, Nicholas and Zhou, Yang and Marshall, Carl and Tulsiani, Shubham and Leak, Caleb}, title = {EditCtrl: Disentangled Local and Global Control for Real-Time Generative Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8965-8975} }
Masking Teacher and Reinforcing Student for Distilling Vision-Language Models: Byung-Kwan Lee,

Yu-Chiang Frank Wang,

Ryo Hachiuma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Wang, Yu-Chiang Frank and Hachiuma, Ryo}, title = {Masking Teacher and Reinforcing Student for Distilling Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10126-10141} }
DextER: Language-driven Dexterous Grasp Generation with Embodied Reasoning: Junha Lee,

Eunha Park,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junha and Park, Eunha and Cho, Minsu}, title = {DextER: Language-driven Dexterous Grasp Generation with Embodied Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1067-1077} }
Contact-Aware Neural Dynamics: Changwei Jing,

Jai Krishna Bandi,

Jianglong Ye,

Yan Duan,

Pieter Abbeel,

Xiaolong Wang,

Sha Yi; [pdf] [arXiv]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Changwei and Bandi, Jai Krishna and Ye, Jianglong and Duan, Yan and Abbeel, Pieter and Wang, Xiaolong and Yi, Sha}, title = {Contact-Aware Neural Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13442-13452} }
CASR: A Robust Cyclic Framework for Arbitrary Large-Scale Super-Resolution with Distribution Alignment and Self-Similarity Awareness: Wenhao Guo,

Zhaoran Zhao,

Peng Lu,

Sheng Li,

Qian Qiao,

RuiDe Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Wenhao and Zhao, Zhaoran and Lu, Peng and Li, Sheng and Qiao, Qian and Li, RuiDe}, title = {CASR: A Robust Cyclic Framework for Arbitrary Large-Scale Super-Resolution with Distribution Alignment and Self-Similarity Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2136-2145} }
FlashMotion: Few-Step Controllable Video Generation with Trajectory Guidance: Quanhao Li,

Zhen Xing,

Rui Wang,

Haidong Cao,

Qi Dai,

Daoguo Dong,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Quanhao and Xing, Zhen and Wang, Rui and Cao, Haidong and Dai, Qi and Dong, Daoguo and Wu, Zuxuan}, title = {FlashMotion: Few-Step Controllable Video Generation with Trajectory Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8986-8996} }
Quantum-Gated Task-interaction Knowledge Distillation for Pre-trained Model-based Class-Incremental Learning: Linjie Li,

Huiyu Xiao,

Jiarui Cao,

Zhenyu Wu,

Yang Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Linjie and Xiao, Huiyu and Cao, Jiarui and Wu, Zhenyu and Ji, Yang}, title = {Quantum-Gated Task-interaction Knowledge Distillation for Pre-trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3920-3929} }
See It, Say It, Sorted: An Iterative Training-Free Framework for Visually-Grounded Multimodal Reasoning in LVLMs: Yongchang Zhang,

Oliver Ma,

Tianyi Liu,

Guangquan Zhou,

Yang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongchang and Ma, Oliver and Liu, Tianyi and Zhou, Guangquan and Chen, Yang}, title = {See It, Say It, Sorted: An Iterative Training-Free Framework for Visually-Grounded Multimodal Reasoning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11933-11942} }
HERBench: A Benchmark for Multi-Evidence Integration in Video Question Answering: Dan Ben Ami,

Gabriele Serussi,

Kobi Cohen,

Chaim Baskin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ben_Ami_2026_CVPR, author = {Ben Ami, Dan and Serussi, Gabriele and Cohen, Kobi and Baskin, Chaim}, title = {HERBench: A Benchmark for Multi-Evidence Integration in Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4505-4514} }
LAMP: Language-Assisted Motion Planning for Controllable Video Generation: Muhammed Burak Kizil,

Enes Sanli,

Niloy J. Mitra,

Erkut Erdem,

Aykut Erdem,

Duygu Ceylan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kizil_2026_CVPR, author = {Kizil, Muhammed Burak and Sanli, Enes and Mitra, Niloy J. and Erdem, Erkut and Erdem, Aykut and Ceylan, Duygu}, title = {LAMP: Language-Assisted Motion Planning for Controllable Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12829-12838} }
MMBench-GUI: A Unified Hierarchical Evaluation Framework for Multi-Platform GUI Agents: Xuehui Wang,

Zhenyu Wu,

JingJing Xie,

Zichen Ding,

Bowen Yang,

Zehao Li,

Zhaoyang Liu,

Qingyun Li,

Xuan Dong,

Zhe Chen,

Weiyun Wang,

Xiangyu Zhao,

Jixuan Chen,

Haodong Duan,

Tianbao Xie,

Chenyu Yang,

Shiqian Su,

Yue Yu,

Yanting Zhang,

Xiangyu Yue,

Weijie Su,

Xizhou Zhu,

Wei Shen,

Jifeng Dai,

Wenhai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuehui and Wu, Zhenyu and Xie, JingJing and Ding, Zichen and Yang, Bowen and Li, Zehao and Liu, Zhaoyang and Li, Qingyun and Dong, Xuan and Chen, Zhe and Wang, Weiyun and Zhao, Xiangyu and Chen, Jixuan and Duan, Haodong and Xie, Tianbao and Yang, Chenyu and Su, Shiqian and Yu, Yue and Zhang, Yanting and Yue, Xiangyu and Su, Weijie and Zhu, Xizhou and Shen, Wei and Dai, Jifeng and Wang, Wenhai}, title = {MMBench-GUI: A Unified Hierarchical Evaluation Framework for Multi-Platform GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6239-6248} }
DETACH : Decomposed Spatio-Temporal Alignment for Exocentric Video and Ambient Sensors with Staged Learning: Junho Yoon,

Jaemo Jeong,

Hyunju Kim,

Dongman Lee; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Junho and Jeong, Jaemo and Kim, Hyunju and Lee, Dongman}, title = {DETACH : Decomposed Spatio-Temporal Alignment for Exocentric Video and Ambient Sensors with Staged Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12860-12870} }
ViStoryBench: Comprehensive Benchmark Suite for Story Visualization: Cailin Zhuang,

Ailin Huang,

Yaoqi Hu,

Jingwei Wu,

Wei Cheng,

Jiaqi Liao,

Hongyuan Wang,

Xinyao Liao,

Weiwei Cai,

Hengyuan Xu,

Xuanyang Zhang,

Xianfang Zeng,

Zhewei Huang,

Gang Yu,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Cailin and Huang, Ailin and Hu, Yaoqi and Wu, Jingwei and Cheng, Wei and Liao, Jiaqi and Wang, Hongyuan and Liao, Xinyao and Cai, Weiwei and Xu, Hengyuan and Zhang, Xuanyang and Zeng, Xianfang and Huang, Zhewei and Yu, Gang and Zhang, Chi}, title = {ViStoryBench: Comprehensive Benchmark Suite for Story Visualization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9455-9467} }
Arcadia: Toward a Full-Lifecycle Framework for Embodied Lifelong Learning: Minghe Gao,

Juncheng Li,

Yuze Lin,

Xuqi Liu,

Jiaming Ji,

Xiaoran Pan,

Zihan Xu,

Xian Li,

Mingjie Li,

Wei Ji,

Rong Wei,

Rui Tang,

Qizhou Wang,

Kai Shen,

Jun Xiao,

Qi Wu,

Siliang Tang,

Yueting Zhuang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Minghe and Li, Juncheng and Lin, Yuze and Liu, Xuqi and Ji, Jiaming and Pan, Xiaoran and Xu, Zihan and Li, Xian and Li, Mingjie and Ji, Wei and Wei, Rong and Tang, Rui and Wang, Qizhou and Shen, Kai and Xiao, Jun and Wu, Qi and Tang, Siliang and Zhuang, Yueting}, title = {Arcadia: Toward a Full-Lifecycle Framework for Embodied Lifelong Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1031-1040} }
Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera: Mukai Yu,

Mosam Dabhi,

Liuyue Xie,

Sebastian Scherer,

László A. Jeni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Mukai and Dabhi, Mosam and Xie, Liuyue and Scherer, Sebastian and Jeni, L\'aszl\'o A.}, title = {Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6106-6115} }
RADAR: VQ-VAE Decoder of VAR is a Good Student for Restoring Against Degradation by Acceleration: Ziyang Wang,

Yue Zhang,

Mingdao Wang,

Yasen Zhang,

Teer Song,

Yu Tian,

Xueming Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhang, Yue and Wang, Mingdao and Zhang, Yasen and Song, Teer and Tian, Yu and Li, Xueming}, title = {RADAR: VQ-VAE Decoder of VAR is a Good Student for Restoring Against Degradation by Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5273-5282} }
Selective, Regularized, and Calibrated: Harnessing Vision Foundation Models for Cross-Domain Few-Shot Semantic Segmentation: Junyuan Ma,

Xunzhi Xiang,

Wenbin Li,

Qi Fan,

Yang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Junyuan and Xiang, Xunzhi and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {Selective, Regularized, and Calibrated: Harnessing Vision Foundation Models for Cross-Domain Few-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12385-12395} }
LDP-Slicing: Local Differential Privacy for Images via Randomized Bit-Plane Slicing: Yuanming Cao,

Chengqi Li,

Wenbo He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yuanming and Li, Chengqi and He, Wenbo}, title = {LDP-Slicing: Local Differential Privacy for Images via Randomized Bit-Plane Slicing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {92-101} }
MERLIN: Building Low-SNR Robust Multimodal LLMs for Electromagnetic Signals: Junyu Shen,

Zhendong She,

Chenghanyu Zhang,

Yuchuang Sun,

Luqing Luo,

Dingwei Tan,

Zonghao Guo,

Bo Guo,

Zehua Han,

Wupeng Xie,

Yaxin Mu,

Peng Zhang,

Peipei Li,

Fengxiang Wang,

Yangang Sun,

Maosong Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Junyu and She, Zhendong and Zhang, Chenghanyu and Sun, Yuchuang and Luo, Luqing and Tan, Dingwei and Guo, Zonghao and Guo, Bo and Han, Zehua and Xie, Wupeng and Mu, Yaxin and Zhang, Peng and Li, Peipei and Wang, Fengxiang and Sun, Yangang and Sun, Maosong}, title = {MERLIN: Building Low-SNR Robust Multimodal LLMs for Electromagnetic Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8653-8663} }
Think Before You Drive: World Model-Inspired Multimodal Grounding: Haicheng Liao,

Huanming Shen,

Bonan Wang,

Yongkang Li,

Yihong Tang,

Chengyue Wang,

Dingyi Zhuang,

Kehua Chen,

Hai Yang,

Chengzhong Xu,

Zhenning Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Haicheng and Shen, Huanming and Wang, Bonan and Li, Yongkang and Tang, Yihong and Wang, Chengyue and Zhuang, Dingyi and Chen, Kehua and Yang, Hai and Xu, Chengzhong and Li, Zhenning}, title = {Think Before You Drive: World Model-Inspired Multimodal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3677-3687} }
Align Once to Explain: Feature Alignment for Scalable B-cosification of Foundational Vision Transformers: Raphael Maser,

Siddhartha Gairola,

Sukrut Rao,

Bernt Schiele; [pdf] [supp]
[bibtex]
@InProceedings{Maser_2026_CVPR, author = {Maser, Raphael and Gairola, Siddhartha and Rao, Sukrut and Schiele, Bernt}, title = {Align Once to Explain: Feature Alignment for Scalable B-cosification of Foundational Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9869-9879} }
PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image: Ziang Cao,

Fangzhou Hong,

Zhaoxi Chen,

Liang Pan,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Ziang and Hong, Fangzhou and Chen, Zhaoxi and Pan, Liang and Liu, Ziwei}, title = {PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5839-5848} }
HoloCine: Holistic Generation of Cinematic Multi-Shot Long Video Narratives: Yihao Meng,

Hao Ouyang,

Yue Yu,

Qiuyu Wang,

Wen Wang,

Ka Leong Cheng,

Hanlin Wang,

Shuailei Ma,

Yixuan Li,

Cheng Chen,

Yanhong Zeng,

Xing Zhu,

Yujun Shen,

Huamin Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Yihao and Ouyang, Hao and Yu, Yue and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Wang, Hanlin and Ma, Shuailei and Li, Yixuan and Chen, Cheng and Zeng, Yanhong and Zhu, Xing and Shen, Yujun and Qu, Huamin}, title = {HoloCine: Holistic Generation of Cinematic Multi-Shot Long Video Narratives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {461-471} }
S$^2$-MLLM: Boosting Spatial Reasoning Capability of MLLMs for 3D Visual Grounding with Structural Guidance: Beining Xu,

Siting Zhu,

Zhao Jin,

Junxian Li,

Hesheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Beining and Zhu, Siting and Jin, Zhao and Li, Junxian and Wang, Hesheng}, title = {S\${\textasciicircum}2\$-MLLM: Boosting Spatial Reasoning Capability of MLLMs for 3D Visual Grounding with Structural Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2557-2569} }
Region-Adaptive Sampling for Diffusion Transformers: Ziming Liu,

Yifan Yang,

Chengruidong Zhang,

Yiqi Zhang,

Lili Qiu,

Yang You,

Yuqing Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ziming and Yang, Yifan and Zhang, Chengruidong and Zhang, Yiqi and Qiu, Lili and You, Yang and Yang, Yuqing}, title = {Region-Adaptive Sampling for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2346-2356} }
Spatial-SSRL: Enhancing Spatial Understanding via Self-Supervised Reinforcement Learning: Yuhong Liu,

Beichen Zhang,

Yuhang Zang,

Yuhang Cao,

Long Xing,

Xiaoyi Dong,

Haodong Duan,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuhong and Zhang, Beichen and Zang, Yuhang and Cao, Yuhang and Xing, Long and Dong, Xiaoyi and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Spatial-SSRL: Enhancing Spatial Understanding via Self-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9570-9581} }
SoliReward: Mitigating Susceptibility to Reward Hacking and Annotation Noise in Video Generation Reward Models: Jiesong Lian,

Ruizhe Zhong,

Zixiang Zhou,

Xiaoyue Mi,

Long Hu,

Yuan Zhou,

Qinglin Lu,

Yixue Hao,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Jiesong and Zhong, Ruizhe and Zhou, Zixiang and Mi, Xiaoyue and Hu, Long and Zhou, Yuan and Lu, Qinglin and Hao, Yixue and Yan, Junchi}, title = {SoliReward: Mitigating Susceptibility to Reward Hacking and Annotation Noise in Video Generation Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12798-12807} }
Color When It Counts: Grayscale-Guided Online Triggering for Always-On Streaming Video Sensing: Weitong Cai,

Hang Zhang,

Yukai Huang,

Shitong Sun,

Jiankang Deng,

Songcen Xu,

Jifei Song,

Zhensong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Weitong and Zhang, Hang and Huang, Yukai and Sun, Shitong and Deng, Jiankang and Xu, Songcen and Song, Jifei and Zhang, Zhensong}, title = {Color When It Counts: Grayscale-Guided Online Triggering for Always-On Streaming Video Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9784-9793} }
UniSpector: Towards Universal Open-set Defect Recognition via Spectral-Contrastive Visual Prompting: Geonuk Kim,

Minhoi Kim,

Kangil Lee,

Minsu Kim,

Hyeonseong Jeon,

Jeonghoon Han,

Hyoungjoon Lim,

Junho Yim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Geonuk and Kim, Minhoi and Lee, Kangil and Kim, Minsu and Jeon, Hyeonseong and Han, Jeonghoon and Lim, Hyoungjoon and Yim, Junho}, title = {UniSpector: Towards Universal Open-set Defect Recognition via Spectral-Contrastive Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6261-6270} }
Globscope: Toward a Global View of the Loss Landscape: Mashiat Mustaq,

Xavier M. Tricoche; [pdf] [supp]
[bibtex]
@InProceedings{Mustaq_2026_CVPR, author = {Mustaq, Mashiat and Tricoche, Xavier M.}, title = {Globscope: Toward a Global View of the Loss Landscape}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5263-5272} }
PaNDaS: Learnable Shape Interpolation Modeling with Localized Control: Thomas Besnier,

Emery Pierson,

Sylvain Arguillere,

Maks Ovsjanikov,

Mohamed Daoudi; [pdf] [supp]
[bibtex]
@InProceedings{Besnier_2026_CVPR, author = {Besnier, Thomas and Pierson, Emery and Arguillere, Sylvain and Ovsjanikov, Maks and Daoudi, Mohamed}, title = {PaNDaS: Learnable Shape Interpolation Modeling with Localized Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13102-13112} }
Den-TP: A Density-Balanced Data Curation and Evaluation Framework for Trajectory Prediction: Ruining Yang,

Yi Xu,

Yun Fu,

Lili Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ruining and Xu, Yi and Fu, Yun and Su, Lili}, title = {Den-TP: A Density-Balanced Data Curation and Evaluation Framework for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10632-10641} }
iSHIFT: Lightweight Slow-Fast GUI Agent with Adaptive Perception: Sarthak Mehrotra,

Sairam VC Rebbapragada,

Mani Bonthu,

Vineeth N. Balasubramanian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehrotra_2026_CVPR, author = {Mehrotra, Sarthak and Rebbapragada, Sairam VC and Bonthu, Mani and Balasubramanian, Vineeth N.}, title = {iSHIFT: Lightweight Slow-Fast GUI Agent with Adaptive Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6229-6238} }
PG-VTON: Single-Pass Training-Free Virtual Try-On via Patch-Guided Reference Alignment: Guohao Zhao,

Yuxin Peng; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Guohao and Peng, Yuxin}, title = {PG-VTON: Single-Pass Training-Free Virtual Try-On via Patch-Guided Reference Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7859-7868} }
Label-Free Cross-Task LoRA Merging with Null-Space Compression: Wonyoung Lee,

Wooseong Jeong,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Wonyoung and Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Label-Free Cross-Task LoRA Merging with Null-Space Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {847-859} }
An Optimal Transport-driven Approach for Cultivating Latent Space in Online Incremental Learning: Quyen Tran,

Hai Nguyen,

Quan Dao,

Hoang Phan,

Linh Van,

Khoat Than,

Dinh Phung,

Dimitris Metaxas,

Trung Le; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Quyen and Nguyen, Hai and Dao, Quan and Phan, Hoang and Van, Linh and Than, Khoat and Phung, Dinh and Metaxas, Dimitris and Le, Trung}, title = {An Optimal Transport-driven Approach for Cultivating Latent Space in Online Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10851-10862} }
VisionDirector: Vision-Language Guided Closed-Loop Refinement for Generative Image Synthesis: Meng Chu,

Senqiao Yang,

Haoxuan Che,

Suiyun Zhang,

Xichen Zhang,

Shaozuo Yu,

Haokun Gui,

Zhefan Rao,

Dandan Tu,

Rui Liu,

Jiaya Jia; [pdf] [arXiv]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Meng and Yang, Senqiao and Che, Haoxuan and Zhang, Suiyun and Zhang, Xichen and Yu, Shaozuo and Gui, Haokun and Rao, Zhefan and Tu, Dandan and Liu, Rui and Jia, Jiaya}, title = {VisionDirector: Vision-Language Guided Closed-Loop Refinement for Generative Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9203-9212} }
PosterOmni: Generalized Artistic Poster Creation via Task Distillation and Unified Reward Feedback: Sixiang Chen,

Jianyu Lai,

Jialin Gao,

Hengyu Shi,

Zhongying Liu,

Tian Ye,

Junfeng Luo,

Xiaoming Wei,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Sixiang and Lai, Jianyu and Gao, Jialin and Shi, Hengyu and Liu, Zhongying and Ye, Tian and Luo, Junfeng and Wei, Xiaoming and Zhu, Lei}, title = {PosterOmni: Generalized Artistic Poster Creation via Task Distillation and Unified Reward Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5978-5987} }
BridgeEQA: Virtual Embodied Agents for Real Bridge Inspections: Subin Varghese,

Joshua Gao,

Asad Ur Rahman,

Vedhus Hoskere; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Varghese_2026_CVPR, author = {Varghese, Subin and Gao, Joshua and Rahman, Asad Ur and Hoskere, Vedhus}, title = {BridgeEQA: Virtual Embodied Agents for Real Bridge Inspections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8163-8173} }
A Unified Perspective on Adversarial Membership Manipulation in Vision Models: Ruize Gao,

Kaiwen Zhou,

Yongqiang Chen,

Feng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Ruize and Zhou, Kaiwen and Chen, Yongqiang and Liu, Feng}, title = {A Unified Perspective on Adversarial Membership Manipulation in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1554-1564} }
Seeing Through Touch: Tactile-Driven Visual Localization of Material Regions: Seongyu Kim,

Seungwoo Lee,

Hyeonggon Ryu,

Joon Son Chung,

Arda Senocak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seongyu and Lee, Seungwoo and Ryu, Hyeonggon and Chung, Joon Son and Senocak, Arda}, title = {Seeing Through Touch: Tactile-Driven Visual Localization of Material Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8717-8726} }
Data-Centric Meta-Learning for Robust Few-Shot Generalization: Jongmin Lim,

Soobin Cha,

Jaehun Park,

Inho Oh,

Minho Park,

Kwangsu Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Jongmin and Cha, Soobin and Park, Jaehun and Oh, Inho and Park, Minho and Kim, Kwangsu}, title = {Data-Centric Meta-Learning for Robust Few-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5543-5552} }
MICON-Bench: Benchmarking and Enhancing Multi-Image Context Image Generation in Unified Multimodal Models: Mingrui Wu,

Hang Liu,

Jiayi Ji,

Xiaoshuai Sun,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mingrui and Liu, Hang and Ji, Jiayi and Sun, Xiaoshuai and Ji, Rongrong}, title = {MICON-Bench: Benchmarking and Enhancing Multi-Image Context Image Generation in Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8227-8236} }
Scaling4D: Pushing the Frontier of Video Novel View Synthesis through Large-Scale Monocular Videos: Hongrui Cai,

Junjie Luo,

Zhihong Fu,

Shengnan Zhu,

Jiawei Wen,

Wanquan Feng,

Songtao Zhao,

Qian He; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Hongrui and Luo, Junjie and Fu, Zhihong and Zhu, Shengnan and Wen, Jiawei and Feng, Wanquan and Zhao, Songtao and He, Qian}, title = {Scaling4D: Pushing the Frontier of Video Novel View Synthesis through Large-Scale Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11174-11184} }
Chain of World: World Model Thinking in Latent Motion: Fuxiang Yang,

Donglin Di,

Lulu Tang,

Xuancheng Zhang,

Lei Fan,

Hao Li,

Wei Chen,

Tonghua Su,

Baorui Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Fuxiang and Di, Donglin and Tang, Lulu and Zhang, Xuancheng and Fan, Lei and Li, Hao and Chen, Wei and Su, Tonghua and Ma, Baorui}, title = {Chain of World: World Model Thinking in Latent Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6675-6684} }
MimiCAT: Mimic with Correspondence-Aware Cascade-Transformer for Category-Free 3D Pose Transfer: Zenghao Chai,

Chen Tang,

Yongkang Wong,

Xulei Yang,

Mohan Kankanhalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chai_2026_CVPR, author = {Chai, Zenghao and Tang, Chen and Wong, Yongkang and Yang, Xulei and Kankanhalli, Mohan}, title = {MimiCAT: Mimic with Correspondence-Aware Cascade-Transformer for Category-Free 3D Pose Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13962-13973} }
Inferring Compositional 4D Scenes without Ever Seeing One: Ahmet Berke Gökmen,

Ajad Chhatkuli,

Luc Van Gool,

Danda Pani Paudel; [pdf] [supp]
[bibtex]
@InProceedings{Gokmen_2026_CVPR, author = {G\"okmen, Ahmet Berke and Chhatkuli, Ajad and Van Gool, Luc and Paudel, Danda Pani}, title = {Inferring Compositional 4D Scenes without Ever Seeing One}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {317-329} }
Multi-Crit: Benchmarking Multimodal Judges on Pluralistic Criteria-Following: Tianyi Xiong,

Yi Ge,

Ming Li,

Zuolong Zhang,

Pranav Kulkarni,

Kaishen Wang,

Qi He,

Zeying Zhu,

Chenxi Liu,

Ruibo Chen,

Tong Zheng,

Yanshuo Chen,

Xiyao Wang,

Renrui Zhang,

Wenhu Chen,

Heng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianyi and Ge, Yi and Li, Ming and Zhang, Zuolong and Kulkarni, Pranav and Wang, Kaishen and He, Qi and Zhu, Zeying and Liu, Chenxi and Chen, Ruibo and Zheng, Tong and Chen, Yanshuo and Wang, Xiyao and Zhang, Renrui and Chen, Wenhu and Huang, Heng}, title = {Multi-Crit: Benchmarking Multimodal Judges on Pluralistic Criteria-Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8641-8652} }
OneThinker: All-in-one Reasoning Model for Image and Video: Kaituo Feng,

Manyuan Zhang,

Hongyu Li,

Kaixuan Fan,

Shuang Chen,

Yilei Jiang,

Dian Zheng,

Peiwen Sun,

Yiyuan Zhang,

Haoze Sun,

Yan Feng,

Peng Pei,

Xunliang Cai,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Kaituo and Zhang, Manyuan and Li, Hongyu and Fan, Kaixuan and Chen, Shuang and Jiang, Yilei and Zheng, Dian and Sun, Peiwen and Zhang, Yiyuan and Sun, Haoze and Feng, Yan and Pei, Peng and Cai, Xunliang and Yue, Xiangyu}, title = {OneThinker: All-in-one Reasoning Model for Image and Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5432-5443} }
On Token's Dilemma: Dynamic MoE with Drift-Aware Token Assignment for Continual Learning of Large Vision Language Models: Chongyang Zhao,

Mingsong Li,

Haodong Lu,

Dong Gong; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chongyang and Li, Mingsong and Lu, Haodong and Gong, Dong}, title = {On Token's Dilemma: Dynamic MoE with Drift-Aware Token Assignment for Continual Learning of Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3941-3952} }
VES-RFT: Rewarding Visual Evidence Sensitivity to Mitigate Hallucinations in Large Vision-Language Models: Xuehe Hou,

Wenshuo Li,

Yali Li,

Han Shu,

Yuan Wang,

Xinghao Chen,

Shengjin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Xuehe and Li, Wenshuo and Li, Yali and Shu, Han and Wang, Yuan and Chen, Xinghao and Wang, Shengjin}, title = {VES-RFT: Rewarding Visual Evidence Sensitivity to Mitigate Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4168-4177} }
Bidirectional Normalizing Flow: From Data to Noise and Back: Yiyang Lu,

Qiao Sun,

Xianbang Wang,

Zhicheng Jiang,

Hanhong Zhao,

Kaiming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yiyang and Sun, Qiao and Wang, Xianbang and Jiang, Zhicheng and Zhao, Hanhong and He, Kaiming}, title = {Bidirectional Normalizing Flow: From Data to Noise and Back}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2069-2078} }
4C4D: 4 Camera 4D Gaussian Splatting: Junsheng Zhou,

Zhifan Yang,

Liang Han,

Wenyuan Zhang,

Kanle Shi,

Shenkun Xu,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Junsheng and Yang, Zhifan and Han, Liang and Zhang, Wenyuan and Shi, Kanle and Xu, Shenkun and Liu, Yu-Shen}, title = {4C4D: 4 Camera 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11829-11839} }
CROWn: A Unified Framework for Anti-Aliased Downsampling and Phase-Calibrated Fusion in 3D Medical Segmentation: Xingru Huang,

Shuanghua Ye,

Zhao Huang,

Wenwen Tang,

Huiyu Zhou,

Zhiwen Zheng,

Jin Liu,

Xiaoshuai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xingru and Ye, Shuanghua and Huang, Zhao and Tang, Wenwen and Zhou, Huiyu and Zheng, Zhiwen and Liu, Jin and Zhang, Xiaoshuai}, title = {CROWn: A Unified Framework for Anti-Aliased Downsampling and Phase-Calibrated Fusion in 3D Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8514-8524} }
Chain-of-Frames: Advancing Video Understanding in Multimodal LLMs via Frame-Aware Reasoning: Sara Ghazanfari,

Francesco Croce,

Nicolas Flammarion,

Prashanth Krishnamurthy,

Farshad Khorrami,

Siddharth Garg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghazanfari_2026_CVPR, author = {Ghazanfari, Sara and Croce, Francesco and Flammarion, Nicolas and Krishnamurthy, Prashanth and Khorrami, Farshad and Garg, Siddharth}, title = {Chain-of-Frames: Advancing Video Understanding in Multimodal LLMs via Frame-Aware Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2746-2755} }
Linguistic Priors for Visual Decoupling: Towards Symmetric Vision-Brain Alignment: Dongjun Liu,

Weichen Dai,

Jingsheng Qian,

Honggang Liu,

Hangjie Yi,

Wanzeng Kong; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Dongjun and Dai, Weichen and Qian, Jingsheng and Liu, Honggang and Yi, Hangjie and Kong, Wanzeng}, title = {Linguistic Priors for Visual Decoupling: Towards Symmetric Vision-Brain Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7869-7878} }
What Matters in Practical Learned Image Compression: Kedar Tatwawadi,

Parisa Rahimzadeh,

Zhanghao Sun,

Zhiqi Chen,

Ziyun Yang,

Sanjay Nair,

Divija Hasteer,

Oren Rippel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tatwawadi_2026_CVPR, author = {Tatwawadi, Kedar and Rahimzadeh, Parisa and Sun, Zhanghao and Chen, Zhiqi and Yang, Ziyun and Nair, Sanjay and Hasteer, Divija and Rippel, Oren}, title = {What Matters in Practical Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12095-12105} }
DeepAlign: Mitigating Modality Conflict through Modality-Specific Alignment: Shuo Li,

Bingchen Miao,

Wendong Bu,

Juncheng Li,

Hanwang Zhang,

Fei Wu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shuo and Miao, Bingchen and Bu, Wendong and Li, Juncheng and Zhang, Hanwang and Wu, Fei}, title = {DeepAlign: Mitigating Modality Conflict through Modality-Specific Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7847-7858} }
G$^2$VLM: Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning: Wenbo Hu,

Jingli Lin,

Yilin Long,

Yunlong Ran,

Lihan Jiang,

Yifan Wang,

Chenming Zhu,

Runsen Xu,

Tai Wang,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Wenbo and Lin, Jingli and Long, Yilin and Ran, Yunlong and Jiang, Lihan and Wang, Yifan and Zhu, Chenming and Xu, Runsen and Wang, Tai and Pang, Jiangmiao}, title = {G\${\textasciicircum}2\$VLM: Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9535-9546} }
ViT$^3$: Unlocking Test-Time Training in Vision: Dongchen Han,

Yining Li,

Tianyu Li,

Zixuan Cao,

Ziming Wang,

Jun Song,

Yu Cheng,

Bo Zheng,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Dongchen and Li, Yining and Li, Tianyu and Cao, Zixuan and Wang, Ziming and Song, Jun and Cheng, Yu and Zheng, Bo and Huang, Gao}, title = {ViT\${\textasciicircum}3\$: Unlocking Test-Time Training in Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {51-61} }
CG-Reasoner: Centroid-Guided Positional Reasoning Segmentation for Medical Imaging with a Robust Visual-Text Consistency Metric: Lakshmikar Reddy Polamreddy,

Ming Ma; [pdf] [supp]
[bibtex]
@InProceedings{Polamreddy_2026_CVPR, author = {Polamreddy, Lakshmikar Reddy and Ma, Ming}, title = {CG-Reasoner: Centroid-Guided Positional Reasoning Segmentation for Medical Imaging with a Robust Visual-Text Consistency Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1472-1481} }
Learning to Select Visual Tools from Experience: Zeyi Huang,

Yuyang Ji,

Anirudh Sundara Rajan,

Zefan Cai,

Wen Xiao,

Haohan Wang,

Junjie Hu,

Yong Jae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zeyi and Ji, Yuyang and Rajan, Anirudh Sundara and Cai, Zefan and Xiao, Wen and Wang, Haohan and Hu, Junjie and Lee, Yong Jae}, title = {Learning to Select Visual Tools from Experience}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4783-4793} }
UAVLight: A Benchmark for Illumination-Robust 3D Reconstruction in Unmanned Aerial Vehicle (UAV) Scenes: Kang Du,

Xue Liao,

Junpeng Xia,

Chaozheng Guo,

Yi Gu,

Yirui Guan,

Duotun Wang,

Sheng Huang,

Zeyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Kang and Liao, Xue and Xia, Junpeng and Guo, Chaozheng and Gu, Yi and Guan, Yirui and Wang, Duotun and Huang, Sheng and Wang, Zeyu}, title = {UAVLight: A Benchmark for Illumination-Robust 3D Reconstruction in Unmanned Aerial Vehicle (UAV) Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5670-5679} }
DemoFunGrasp: Universal Dexterous Functional Grasping via Demonstration-Editing Reinforcement Learning: Chuan Mao,

Haoqi Yuan,

Ziye Huang,

Chaoyi Xu,

Kai Ma,

Zongqing Lu; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Chuan and Yuan, Haoqi and Huang, Ziye and Xu, Chaoyi and Ma, Kai and Lu, Zongqing}, title = {DemoFunGrasp: Universal Dexterous Functional Grasping via Demonstration-Editing Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {986-995} }
DABO: Difficulty-Aware Bayesian Optimization with Diffusion-Learned Priors: Mengyang Li,

Pinlong Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Zhao, Pinlong}, title = {DABO: Difficulty-Aware Bayesian Optimization with Diffusion-Learned Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6126-6135} }
Advancing Image Classification with Discrete Diffusion Classification Modeling: Omer Belhasin,

Shelly Golan,

Ran El-Yaniv,

Michael Elad; [pdf] [supp]
[bibtex]
@InProceedings{Belhasin_2026_CVPR, author = {Belhasin, Omer and Golan, Shelly and El-Yaniv, Ran and Elad, Michael}, title = {Advancing Image Classification with Discrete Diffusion Classification Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {124-134} }
CoopDiff: A Diffusion-Guided Approach for Cooperation under Corruptions: Gong Chen,

Chaokun Zhang,

Pengcheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Lv, Pengcheng}, title = {CoopDiff: A Diffusion-Guided Approach for Cooperation under Corruptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11546-11555} }
RecoverMark: Robust Watermarking for Localization and Recovery of Manipulated Faces: Haonan An,

Xiaohui Ye,

Guang Hua,

Yihang Tao,

Hangcheng Cao,

Xiangyu Yu,

Yuguang Fang; [pdf] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Haonan and Ye, Xiaohui and Hua, Guang and Tao, Yihang and Cao, Hangcheng and Yu, Xiangyu and Fang, Yuguang}, title = {RecoverMark: Robust Watermarking for Localization and Recovery of Manipulated Faces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8587-8597} }
Scaling Zero-Shot Reference-to-Video Generation: Zijian Zhou,

Shikun Liu,

Haozhe Liu,

Haonan Qiu,

Zhaochong An,

Weiming Ren,

Zhiheng Liu,

Xiaoke Huang,

Kam-Woh Ng,

Tian Xie,

Xiao Han,

Yuren Cong,

Hang Li,

Chuyan Zhu,

Aditya Patel,

Tao Xiang,

Sen He; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijian and Liu, Shikun and Liu, Haozhe and Qiu, Haonan and An, Zhaochong and Ren, Weiming and Liu, Zhiheng and Huang, Xiaoke and Ng, Kam-Woh and Xie, Tian and Han, Xiao and Cong, Yuren and Li, Hang and Zhu, Chuyan and Patel, Aditya and Xiang, Tao and He, Sen}, title = {Scaling Zero-Shot Reference-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9253-9262} }
$\oslash$ Source Models Leak What They Shouldn't $\nrightarrow$: Unlearning Zero-Shot Transfer in Domain Adaptation Through Adversarial Optimization: Arnav Devalapally,

Poornima Jain,

Kartik Srinivas,

Vineeth N. Balasubramanian; [pdf] [supp]
[bibtex]
@InProceedings{Devalapally_2026_CVPR, author = {Devalapally, Arnav and Jain, Poornima and Srinivas, Kartik and Balasubramanian, Vineeth N.}, title = {\${\textbackslash}oslash\$ Source Models Leak What They Shouldn't \${\textbackslash}nrightarrow\$: Unlearning Zero-Shot Transfer in Domain Adaptation Through Adversarial Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1543-1553} }
Spectral-Geometric Neural Fields for Pose-Free LiDAR View Synthesis: Yinuo Jiang,

Jun Cheng,

Yiran Wang,

Cheng Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yinuo and Cheng, Jun and Wang, Yiran and Cheng, Cheng}, title = {Spectral-Geometric Neural Fields for Pose-Free LiDAR View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2993-3003} }
VDOT: Efficient Unified Video Creation via Optimal Transport Distillation: Yutong Wang,

Haiyu Zhang,

Tianfan Xue,

Yu Qiao,

Yaohui Wang,

Chang Xu,

Xinyuan Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yutong and Zhang, Haiyu and Xue, Tianfan and Qiao, Yu and Wang, Yaohui and Xu, Chang and Chen, Xinyuan}, title = {VDOT: Efficient Unified Video Creation via Optimal Transport Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9273-9283} }
Hyperbolic Relational Prompts for Intersectional Fairness in Medical VLMs: Jiayu Qian,

Zongxian Yang,

Guanxing Chen,

Pengwei Hu,

KC Tan,

Yan Wang,

Yu-An Huang,

Zhi-An Huang; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Jiayu and Yang, Zongxian and Chen, Guanxing and Hu, Pengwei and Tan, KC and Wang, Yan and Huang, Yu-An and Huang, Zhi-An}, title = {Hyperbolic Relational Prompts for Intersectional Fairness in Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13712-13721} }
Perception Characteristics Distance: Measuring Stability and Robustness of Perception System in Dynamic Conditions under a Certain Decision Rule: Boyu Jiang,

Liang Shi,

Zhengzhi Lin,

Lanxin Xiang,

Loren Stowe,

Feng Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Boyu and Shi, Liang and Lin, Zhengzhi and Xiang, Lanxin and Stowe, Loren and Guo, Feng}, title = {Perception Characteristics Distance: Measuring Stability and Robustness of Perception System in Dynamic Conditions under a Certain Decision Rule}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4829-4838} }
UniCorrn: Unified Correspondence Transformer Across 2D and 3D: Prajnan Goswami,

Tianye Ding,

Feng Liu,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goswami_2026_CVPR, author = {Goswami, Prajnan and Ding, Tianye and Liu, Feng and Jiang, Huaizu}, title = {UniCorrn: Unified Correspondence Transformer Across 2D and 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9943-9954} }
DrivePI: Spatial-aware 4D MLLM for Unified Autonomous Driving Understanding, Perception, Prediction and Planning: Zhe Liu,

Runhui Huang,

Rui Yang,

Siming Yan,

Zining Wang,

Lu Hou,

Di Lin,

Xiang Bai,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhe and Huang, Runhui and Yang, Rui and Yan, Siming and Wang, Zining and Hou, Lu and Lin, Di and Bai, Xiang and Zhao, Hengshuang}, title = {DrivePI: Spatial-aware 4D MLLM for Unified Autonomous Driving Understanding, Perception, Prediction and Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3688-3698} }
WOD-E2E: Waymo Open Dataset for End-to-End Driving in Challenging Long-tail Scenarios: Runsheng Xu,

Hubert Lin,

Wonseok Jeon,

Hao Feng,

Yuliang Zou,

Liting Sun,

John Gorman,

Kate Tolstaya,

Sarah Tang,

Brandyn White,

Ben Sapp,

Mingxing Tan,

Jyh-Jing Hwang,

Dragomir Anguelov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Runsheng and Lin, Hubert and Jeon, Wonseok and Feng, Hao and Zou, Yuliang and Sun, Liting and Gorman, John and Tolstaya, Kate and Tang, Sarah and White, Brandyn and Sapp, Ben and Tan, Mingxing and Hwang, Jyh-Jing and Anguelov, Dragomir}, title = {WOD-E2E: Waymo Open Dataset for End-to-End Driving in Challenging Long-tail Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3709-3718} }
NoRD: A Data-Efficient Vision-Language-Action Model that Drives without Reasoning: Ishaan Rawal,

Shubh Gupta,

Yihan Hu,

Wei Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rawal_2026_CVPR, author = {Rawal, Ishaan and Gupta, Shubh and Hu, Yihan and Zhan, Wei}, title = {NoRD: A Data-Efficient Vision-Language-Action Model that Drives without Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10965-10975} }
Variation-aware Vision Token Dropping for Faster Large Vision-Language Models: Junjie Chen,

Xuyang Liu,

Zichen Wen,

Yiyu Wang,

Siteng Huang,

Honggang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Xuyang and Wen, Zichen and Wang, Yiyu and Huang, Siteng and Chen, Honggang}, title = {Variation-aware Vision Token Dropping for Faster Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3489-3499} }
Active Inference for Micro-Gesture Recognition: EFE-Guided Temporal Sampling and Adaptive Learning: Weijia Feng,

Jingyu Yang,

Ruojia Zhang,

Fengtao Sun,

Qian Gao,

Chenyang Wang,

Tongtong Su,

Jia Guo,

Xiaobai Li,

Minglai Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Weijia and Yang, Jingyu and Zhang, Ruojia and Sun, Fengtao and Gao, Qian and Wang, Chenyang and Su, Tongtong and Guo, Jia and Li, Xiaobai and Shao, Minglai}, title = {Active Inference for Micro-Gesture Recognition: EFE-Guided Temporal Sampling and Adaptive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13887-13896} }
DGGT: Feedforward 4D Reconstruction of Dynamic Driving Scenes using Unposed Images: Xiaoxue Chen,

Ziyi Xiong,

Yuantao Chen,

Gen Li,

Nan Wang,

Hongcheng Luo,

Long Chen,

Haiyang Sun,

Bing Wang,

Guang Chen,

Hongyang Li,

Ya-Qin Zhang,

Hangjun Ye,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaoxue and Xiong, Ziyi and Chen, Yuantao and Li, Gen and Wang, Nan and Luo, Hongcheng and Chen, Long and Sun, Haiyang and Wang, Bing and Chen, Guang and Li, Hongyang and Zhang, Ya-Qin and Ye, Hangjun and Zhao, Hao}, title = {DGGT: Feedforward 4D Reconstruction of Dynamic Driving Scenes using Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1265-1276} }
FlashCap: Millisecond-Accurate Human Motion Capture via Flashing LEDs and Event-Based Vision: Zekai Wu,

Shuqi Fan,

Mengyin Liu,

Yuhua Luo,

Xincheng Lin,

Ming Yan,

Junhao Wu,

Xiuhong Lin,

Yuexin Ma,

Chenglu Wen,

Lan Xu,

Siqi Shen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zekai and Fan, Shuqi and Liu, Mengyin and Luo, Yuhua and Lin, Xincheng and Yan, Ming and Wu, Junhao and Lin, Xiuhong and Ma, Yuexin and Wen, Chenglu and Xu, Lan and Shen, Siqi and Wang, Cheng}, title = {FlashCap: Millisecond-Accurate Human Motion Capture via Flashing LEDs and Event-Based Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2221-2231} }
E-RayZer: Self-supervised 3D Reconstruction as Spatial Visual Pre-training: Qitao Zhao,

Hao Tan,

Qianqian Wang,

Sai Bi,

Kai Zhang,

Kalyan Sunkavalli,

Shubham Tulsiani,

Hanwen Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qitao and Tan, Hao and Wang, Qianqian and Bi, Sai and Zhang, Kai and Sunkavalli, Kalyan and Tulsiani, Shubham and Jiang, Hanwen}, title = {E-RayZer: Self-supervised 3D Reconstruction as Spatial Visual Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7525-7535} }
RPGFusion: 4D Radar Prior-Guided Multi-Modal Fusion for 3D Detection: Xin Qiu,

Wenjie Liu; [pdf]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xin and Liu, Wenjie}, title = {RPGFusion: 4D Radar Prior-Guided Multi-Modal Fusion for 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {284-294} }
Rounded or Streamlined Head? Bridging Concept Bottleneck Models and Attribute-Described Object Parts: Yang Liu,

Jiajin Zhang,

Yaojun Hu,

Bingguang Hao,

Xin Cao,

Yingda Xia,

Danyang Tu,

Shi Gu,

Ling Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Zhang, Jiajin and Hu, Yaojun and Hao, Bingguang and Cao, Xin and Xia, Yingda and Tu, Danyang and Gu, Shi and Zhang, Ling}, title = {Rounded or Streamlined Head? Bridging Concept Bottleneck Models and Attribute-Described Object Parts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9880-9890} }
SparseWorld-TC: Trajectory-Conditioned Sparse Occupancy World Model: Jiayuan Du,

Yiming Zhao,

Zhenglong Guo,

Yong Pan,

Wenbo Hou,

Zhihui Hao,

Kun Zhan,

Qijun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Jiayuan and Zhao, Yiming and Guo, Zhenglong and Pan, Yong and Hou, Wenbo and Hao, Zhihui and Zhan, Kun and Chen, Qijun}, title = {SparseWorld-TC: Trajectory-Conditioned Sparse Occupancy World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7425-7434} }
Universal 3D Shape Matching via Coarse-to-Fine Language Guidance: Qinfeng Xiao,

Guofeng Mei,

Bo Yang,

Liying Zhang,

Jian Zhang,

Kit-lun Yick; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Qinfeng and Mei, Guofeng and Yang, Bo and Zhang, Liying and Zhang, Jian and Yick, Kit-lun}, title = {Universal 3D Shape Matching via Coarse-to-Fine Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13157-13167} }
MRD: Multi-resolution Retrieval-Detection Fusion for High-Resolution Image Understanding: Fan Yang,

Xingping Dong,

Xin Yu,

Wenhan Luo,

Wei Liu,

Kaihao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Fan and Dong, Xingping and Yu, Xin and Luo, Wenhan and Liu, Wei and Zhang, Kaihao}, title = {MRD: Multi-resolution Retrieval-Detection Fusion for High-Resolution Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2693-2703} }
SplatSuRe: Selective Super-Resolution for Multi-view Consistent 3D Gaussian Splatting: Pranav Asthana,

Alex Hanson,

Allen Tu,

Tom Goldstein,

Matthias Zwicker,

Amitabh Varshney; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asthana_2026_CVPR, author = {Asthana, Pranav and Hanson, Alex and Tu, Allen and Goldstein, Tom and Zwicker, Matthias and Varshney, Amitabh}, title = {SplatSuRe: Selective Super-Resolution for Multi-view Consistent 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11840-11849} }
TAMER: A Tri-Modal Contrastive Alignment and Multi-Scale Embedding Refinement Framework for Zero-Shot ECG Diagnosis: Xuewei Zhou,

Yajie Meng,

Pan Zeng,

Xianfang Tang,

Feifei Cui,

Qiangguo Jin,

Jialiang Yang,

Junlin Xu; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuewei and Meng, Yajie and Zeng, Pan and Tang, Xianfang and Cui, Feifei and Jin, Qiangguo and Yang, Jialiang and Xu, Junlin}, title = {TAMER: A Tri-Modal Contrastive Alignment and Multi-Scale Embedding Refinement Framework for Zero-Shot ECG Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10502-10511} }
RunawayEvil: Jailbreaking the Image-to-Video Generative Models: Songping Wang,

Rufan Qian,

Yueming Lyu,

Qinglong Liu,

Linzhuang Zou,

Jie Qin,

Songhua Liu,

Caifeng Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Songping and Qian, Rufan and Lyu, Yueming and Liu, Qinglong and Zou, Linzhuang and Qin, Jie and Liu, Songhua and Shan, Caifeng}, title = {RunawayEvil: Jailbreaking the Image-to-Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9296-9305} }
Towards High-Quality Image Segmentation: Improving Topology Accuracy by Penalizing Neighbor Pixels: Juan Miguel Valverde,

Dim P. Papadopoulos,

Rasmus Larsen,

Anders Bjorholm Dahl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Valverde_2026_CVPR, author = {Valverde, Juan Miguel and Papadopoulos, Dim P. and Larsen, Rasmus and Dahl, Anders Bjorholm}, title = {Towards High-Quality Image Segmentation: Improving Topology Accuracy by Penalizing Neighbor Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13123-13133} }
InstantRetouch: Efficient and High-Fidelity Instruction-Guided Image Retouching with Bilateral Space: Jiarui Wu,

Yujin Wang,

Ruikang Li,

Fan Zhang,

Mingde Yao,

Tianfan Xue; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jiarui and Wang, Yujin and Li, Ruikang and Zhang, Fan and Yao, Mingde and Xue, Tianfan}, title = {InstantRetouch: Efficient and High-Fidelity Instruction-Guided Image Retouching with Bilateral Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8216-8226} }
When AVSR Meets Video Conferencing: Dataset, Degradation, and the Hidden Mechanism Behind Performance Collapse: Yihuan Huang,

Jun Xue,

Liu Jiajun,

Daixian Li,

Tong Zhang,

Zhuolin Yi,

Yanzhen Ren,

Kai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yihuan and Xue, Jun and Jiajun, Liu and Li, Daixian and Zhang, Tong and Yi, Zhuolin and Ren, Yanzhen and Li, Kai}, title = {When AVSR Meets Video Conferencing: Dataset, Degradation, and the Hidden Mechanism Behind Performance Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4448-4457} }
Bias Is a Subspace, Not a Coordinate: A Geometric Rethinking of Post-hoc Debiasing in Vision-Language Models: Dachuan Zhao,

Weiyue Li,

Zhenda Shen,

Yushu Qiu,

Bowen Xu,

Haoyu Chen,

Yongchao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dachuan and Li, Weiyue and Shen, Zhenda and Qiu, Yushu and Xu, Bowen and Chen, Haoyu and Chen, Yongchao}, title = {Bias Is a Subspace, Not a Coordinate: A Geometric Rethinking of Post-hoc Debiasing in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10230-10240} }
MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes: Kehua Chen,

Tianlu Mao,

Xinzhu Ma,

Hao Jiang,

Zehao Li,

Zihan Liu,

Shuqin Gao,

Honglong Zhao,

Feng Dai,

Yucheng Zhang,

Zhaoqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Kehua and Mao, Tianlu and Ma, Xinzhu and Jiang, Hao and Li, Zehao and Liu, Zihan and Gao, Shuqin and Zhao, Honglong and Dai, Feng and Zhang, Yucheng and Wang, Zhaoqi}, title = {MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {395-405} }
PET-DINO: Unifying Visual Cues into Grounding DINO with Prompt-Enriched Training: Weifu Fu,

Jinyang Li,

Bin-Bin Gao,

Jialin Li,

Yuhuan Lin,

Hanqiu Deng,

Wenbing Tao,

Yong Liu,

Chengjie Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Weifu and Li, Jinyang and Gao, Bin-Bin and Li, Jialin and Lin, Yuhuan and Deng, Hanqiu and Tao, Wenbing and Liu, Yong and Wang, Chengjie}, title = {PET-DINO: Unifying Visual Cues into Grounding DINO with Prompt-Enriched Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13039-13048} }
UniRefiner: Teaching Pre-trained ViTs to Self-Dispose Dross via Contrastive Register: Congpei Qiu,

Zhaoyu Hu,

Wei Ke,

Zhuotao Tian,

Yanhao Wu,

Tong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Congpei and Hu, Zhaoyu and Ke, Wei and Tian, Zhuotao and Wu, Yanhao and Zhang, Tong}, title = {UniRefiner: Teaching Pre-trained ViTs to Self-Dispose Dross via Contrastive Register}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10061-10070} }
Elastic Weight Consolidation Done Right for Continual Learning: Xuan Liu,

Xiaobin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xuan and Chang, Xiaobin}, title = {Elastic Weight Consolidation Done Right for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3930-3940} }
ConsID-Gen: View-Consistent and Identity-Preserving Image-to-Video Generation: Mingyang Wu,

Ashirbad Mishra,

Soumik Dey,

Shuo Xing,

Naveen Ravipati,

Hansi Wu,

Binbin Li,

Zhengzhong Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mingyang and Mishra, Ashirbad and Dey, Soumik and Xing, Shuo and Ravipati, Naveen and Wu, Hansi and Li, Binbin and Tu, Zhengzhong}, title = {ConsID-Gen: View-Consistent and Identity-Preserving Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1853-1863} }
SketchDeco: Training-Free Latent Composition for Precise Sketch Colourisation: Chaitat Utintu,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Utintu_2026_CVPR, author = {Utintu, Chaitat and Song, Yi-Zhe}, title = {SketchDeco: Training-Free Latent Composition for Precise Sketch Colourisation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {484-494} }
VIRAL: Visual Sim-to-Real at Scale for Humanoid Loco-Manipulation: Tairan He,

Zi Wang,

Haoru Xue,

Qingwei Ben,

Zhengyi Luo,

Wenli Xiao,

Ye Yuan,

Xingye Da,

Fernando Castañeda,

Shankar Sastry,

Changliu Liu,

Guanya Shi,

Linxi Fan,

Yuke Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Tairan and Wang, Zi and Xue, Haoru and Ben, Qingwei and Luo, Zhengyi and Xiao, Wenli and Yuan, Ye and Da, Xingye and Casta\~neda, Fernando and Sastry, Shankar and Liu, Changliu and Shi, Guanya and Fan, Linxi and Zhu, Yuke}, title = {VIRAL: Visual Sim-to-Real at Scale for Humanoid Loco-Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13430-13441} }
PP-OCRv5: A Specialized 5M-Parameter Model Rivaling Billion-Parameter Vision-Language Models on OCR Tasks: Cheng Cui,

Yubo Zhang,

Ting Sun,

Xueqing Wang,

Hongen Liu,

Manhui Lin,

Yue Zhang,

Tingquan Gao,

Changda Zhou,

Jiaxuan Liu,

Zelun Zhang,

Jing Zhang,

Jun Zhang,

Yi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Cheng and Zhang, Yubo and Sun, Ting and Wang, Xueqing and Liu, Hongen and Lin, Manhui and Zhang, Yue and Gao, Tingquan and Zhou, Changda and Liu, Jiaxuan and Zhang, Zelun and Zhang, Jing and Zhang, Jun and Liu, Yi}, title = {PP-OCRv5: A Specialized 5M-Parameter Model Rivaling Billion-Parameter Vision-Language Models on OCR Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2467-2476} }
SO(3)-Equivariant ViT-Adapter for Data-Efficient Zero-Shot Sim-to-Real Indoor Panoramic Depth Estimation: Ziyan He,

Qiudan Zhang,

Lin Ma,

Xu Wang; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Ziyan and Zhang, Qiudan and Ma, Lin and Wang, Xu}, title = {SO(3)-Equivariant ViT-Adapter for Data-Efficient Zero-Shot Sim-to-Real Indoor Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5740-5750} }
ACoT-VLA: Action Chain-of-Thought for Vision-Language-Action Models: Linqing Zhong,

Yi Liu,

Yifei Wei,

Ziyu Xiong,

Si Liu,

Guanghui Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Linqing and Liu, Yi and Wei, Yifei and Xiong, Ziyu and Liu, Si and Ren, Guanghui}, title = {ACoT-VLA: Action Chain-of-Thought for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8152-8162} }
Fast SceneScript: Fast and Accurate Language-Based 3D Scene Understanding via Multi-Token Prediction: Ruihong Yin,

Xuepeng Shi,

Oleksandr Bailo,

Marco Manfredi,

Theo Gevers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Ruihong and Shi, Xuepeng and Bailo, Oleksandr and Manfredi, Marco and Gevers, Theo}, title = {Fast SceneScript: Fast and Accurate Language-Based 3D Scene Understanding via Multi-Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2457-2466} }
MSPT: Efficient Large-Scale Physical Modeling via Parallelized Multi-Scale Attention: Pedro M. P. Curvo,

Jan-Willem van de Meent,

Maksim Zhdanov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Curvo_2026_CVPR, author = {Curvo, Pedro M. P. and van de Meent, Jan-Willem and Zhdanov, Maksim}, title = {MSPT: Efficient Large-Scale Physical Modeling via Parallelized Multi-Scale Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12924-12933} }
Spatial-Frequency Collaborative Learning for Occluded Visible-Infrared Person Re-Identification: Jian Yu,

Yujian Feng,

Shuai You,

Zhongkai Zhou,

Fei Wu,

Zhengjun Jing,

Yimu Ji; [pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Jian and Feng, Yujian and You, Shuai and Zhou, Zhongkai and Wu, Fei and Jing, Zhengjun and Ji, Yimu}, title = {Spatial-Frequency Collaborative Learning for Occluded Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4343-4352} }
SARMAE: Masked Autoencoder for SAR Representation Learning: Danxu Liu,

Di Wang,

Hebaixu Wang,

Haoyang Chen,

Wentao Jiang,

Yilin Cheng,

Haonan Guo,

Wei Cui,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Danxu and Wang, Di and Wang, Hebaixu and Chen, Haoyang and Jiang, Wentao and Cheng, Yilin and Guo, Haonan and Cui, Wei and Zhang, Jing}, title = {SARMAE: Masked Autoencoder for SAR Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6496-6507} }
CrossEarth-Gate: Fisher-Guided Adaptive Tuning Engine for Efficient Adaptation of Cross-Domain Remote Sensing Semantic Segmentation: Shilei Cao,

Ziyang Gong,

Hehai Lin,

Yang Liu,

Jiashun Cheng,

Xiaoxing Hu,

Haoyuan Liang,

Guowen Li,

Chengwei Qin,

Hong Cheng,

Xue Yang,

Juepeng Zheng,

Haohuan Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Shilei and Gong, Ziyang and Lin, Hehai and Liu, Yang and Cheng, Jiashun and Hu, Xiaoxing and Liang, Haoyuan and Li, Guowen and Qin, Chengwei and Cheng, Hong and Yang, Xue and Zheng, Juepeng and Fu, Haohuan}, title = {CrossEarth-Gate: Fisher-Guided Adaptive Tuning Engine for Efficient Adaptation of Cross-Domain Remote Sensing Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13222-13233} }
VMonarch: Efficient Video Diffusion Transformers with Structured Attention: Cheng Liang,

Haoxian Chen,

Liang Hou,

Qi Fan,

Gangshan Wu,

Xin Tao,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Cheng and Chen, Haoxian and Hou, Liang and Fan, Qi and Wu, Gangshan and Tao, Xin and Wang, Limin}, title = {VMonarch: Efficient Video Diffusion Transformers with Structured Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4613-4623} }
OddGridBench: Exposing the Lack of Fine-Grained Visual Discrepancy Sensitivity in Multimodal Large Language Models: Tengjin Weng,

Wenhao Jiang,

Jingyi Wang,

Ming Li,

Lin Ma,

Zhong Ming; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2026_CVPR, author = {Weng, Tengjin and Jiang, Wenhao and Wang, Jingyi and Li, Ming and Ma, Lin and Ming, Zhong}, title = {OddGridBench: Exposing the Lack of Fine-Grained Visual Discrepancy Sensitivity in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1575-1584} }
SAIL: Similarity-Aware Guidance and Inter-Caption Augmentation-based Learning for Weakly-Supervised Dense Video Captioning: Ye-Chan Kim,

SeungJu Cha,

Si-Woo Kim,

Minju Jeon,

Hyungee Kim,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Ye-Chan and Cha, SeungJu and Kim, Si-Woo and Jeon, Minju and Kim, Hyungee and Kim, Dong-Jin}, title = {SAIL: Similarity-Aware Guidance and Inter-Caption Augmentation-based Learning for Weakly-Supervised Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3466-3475} }
ReSAM: Refine, Requery, and Reinforce: Self-Prompting Point-Supervised Segmentation for Remote Sensing Images: Muhammad Naseer Subhani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Subhani_2026_CVPR, author = {Subhani, Muhammad Naseer}, title = {ReSAM: Refine, Requery, and Reinforce: Self-Prompting Point-Supervised Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3805-3814} }
Illuminating Visual Identity in Universal Multimodal Embeddings: Jiawei Cao,

Junyi Feng,

Jiashen Hua,

Ziheng Huang,

Bing Deng,

Kaijie Wu,

Chaochen Gu,

Jieping Ye; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Jiawei and Feng, Junyi and Hua, Jiashen and Huang, Ziheng and Deng, Bing and Wu, Kaijie and Gu, Chaochen and Ye, Jieping}, title = {Illuminating Visual Identity in Universal Multimodal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8737-8748} }
SafeGRPO: Self-Rewarded Multimodal Safety Alignment via Rule-Governed Policy Optimization: Xuankun Rong,

Wenke Huang,

Tingfeng Wang,

Daiguo Zhou,

Bo Du,

Mang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rong_2026_CVPR, author = {Rong, Xuankun and Huang, Wenke and Wang, Tingfeng and Zhou, Daiguo and Du, Bo and Ye, Mang}, title = {SafeGRPO: Self-Rewarded Multimodal Safety Alignment via Rule-Governed Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7901-7911} }
Uncertainty-driven 3D Gaussian Splatting Active Mapping via Anisotropic Visibility Field: Shangjie Xue,

Jesse Dill,

Dhruv Ahuja,

Frank Dellaert,

Panagiotis Tsiotras,

Danfei Xu; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Shangjie and Dill, Jesse and Ahuja, Dhruv and Dellaert, Frank and Tsiotras, Panagiotis and Xu, Danfei}, title = {Uncertainty-driven 3D Gaussian Splatting Active Mapping via Anisotropic Visibility Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5014-5026} }
Radiance Meshes for Volumetric Reconstruction: Alexander Mai,

Trevor Hedstrom,

George Kopanas,

Janne Kontkanen,

Falko Kuester,

Jonathan T. Barron; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Alexander and Hedstrom, Trevor and Kopanas, George and Kontkanen, Janne and Kuester, Falko and Barron, Jonathan T.}, title = {Radiance Meshes for Volumetric Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8267-8277} }
Unified Primitive Proxies for Structured Shape Completion: Zhaiyu Chen,

Yuqing Wang,

Xiao Xiang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhaiyu and Wang, Yuqing and Zhu, Xiao Xiang}, title = {Unified Primitive Proxies for Structured Shape Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7457-7467} }
TeFlow: Enabling Multi-frame Supervision for Self-Supervised Feed-forward Scene Flow Estimation: Qingwen Zhang,

Chenhan Jiang,

Xiaomeng Zhu,

Yunqi Miao,

Yushan Zhang,

Olov Andersson,

Patric Jensfelt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qingwen and Jiang, Chenhan and Zhu, Xiaomeng and Miao, Yunqi and Zhang, Yushan and Andersson, Olov and Jensfelt, Patric}, title = {TeFlow: Enabling Multi-frame Supervision for Self-Supervised Feed-forward Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3667-3676} }
Batch Loss Score for Dynamic Data Pruning: Qing Zhou,

Bingxuan Zhao,

Tao Yang,

Hongyuan Zhang,

Junyu Gao,

Qi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qing and Zhao, Bingxuan and Yang, Tao and Zhang, Hongyuan and Gao, Junyu and Wang, Qi}, title = {Batch Loss Score for Dynamic Data Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6188-6197} }
Spatio-Temporal Conditional Denoising Transformer for Modality-Missing RGBT Tracking: Andong Lu,

Ziyi Zha,

Jiandong Jin,

Shihao Li,

Chenglong Li,

Jin Tang,

Bin Luo; [pdf]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Andong and Zha, Ziyi and Jin, Jiandong and Li, Shihao and Li, Chenglong and Tang, Jin and Luo, Bin}, title = {Spatio-Temporal Conditional Denoising Transformer for Modality-Missing RGBT Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13584-13593} }
SignPR: A Progressive Vector-Quantized Diffusion Framework for Sign Language Production: Xiao Liu,

Shiwei Gan,

Yafeng Yin,

Bowen Guo,

Zhiwei Jiang,

Shunmei Meng,

Lei Xie,

Sanglu Lu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiao and Gan, Shiwei and Yin, Yafeng and Guo, Bowen and Jiang, Zhiwei and Meng, Shunmei and Xie, Lei and Lu, Sanglu}, title = {SignPR: A Progressive Vector-Quantized Diffusion Framework for Sign Language Production}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2198-2208} }
PureProof: Diffusion-Resistant Black-box Targeted Attack on Large Vision-Language Models: Yiming Cao,

Dong Wang,

Xinqi Lyu,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yiming and Wang, Dong and Lyu, Xinqi and Xiao, Bin}, title = {PureProof: Diffusion-Resistant Black-box Targeted Attack on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8621-8630} }
LaS-Comp: Zero-shot 3D Completion with Latent-Spatial Consistency: Weilong Yan,

Haipeng Li,

Hao Xu,

Nianjin Ye,

Yihao Ai,

Shuaicheng Liu,

Jingyu Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Weilong and Li, Haipeng and Xu, Hao and Ye, Nianjin and Ai, Yihao and Liu, Shuaicheng and Hu, Jingyu}, title = {LaS-Comp: Zero-shot 3D Completion with Latent-Spatial Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7588-7599} }
EchoVDiff: Cardiac-Cycle Echocardiography Video Generation from Arbitrary Single Frame: Jiansong Zhang,

Xiaying Yang,

Xiaoling Luo,

Linlin Shen; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiansong and Yang, Xiaying and Luo, Xiaoling and Shen, Linlin}, title = {EchoVDiff: Cardiac-Cycle Echocardiography Video Generation from Arbitrary Single Frame}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9040-9050} }
Consensus Entropy: Harnessing Multi-VLM Agreement for Self-Verifying and Self-Improving OCR: Yulong Zhang,

Tianyi Liang,

Erfei Cui,

Guoqing Wang,

Xu Guo,

Chenhui Li,

Gongshen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yulong and Liang, Tianyi and Cui, Erfei and Wang, Guoqing and Guo, Xu and Li, Chenhui and Liu, Gongshen}, title = {Consensus Entropy: Harnessing Multi-VLM Agreement for Self-Verifying and Self-Improving OCR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11643-11653} }
AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation: Luoxi Jing,

Dianxi Shi,

Yushe Cao,

Yuanze Wang,

Junze Zhang,

Yuning Cui,

Mengzhu Wang; [pdf]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Luoxi and Shi, Dianxi and Cao, Yushe and Wang, Yuanze and Zhang, Junze and Cui, Yuning and Wang, Mengzhu}, title = {AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8033-8044} }
Interpretable and Steerable Concept Bottleneck Sparse Autoencoders: Akshay Kulkarni,

Tsui-Wei Weng,

Vivek Narayanaswamy,

Shusen Liu,

Wesam A. Sakla,

Kowshik Thopalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Akshay and Weng, Tsui-Wei and Narayanaswamy, Vivek and Liu, Shusen and Sakla, Wesam A. and Thopalli, Kowshik}, title = {Interpretable and Steerable Concept Bottleneck Sparse Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2918-2927} }
Token Warping Helps MLLMs Look from Nearby Viewpoints: Phillip Y. Lee,

Chanho Park,

Mingue Park,

Seungwoo Yoo,

Juil Koo,

Minhyuk Sung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Phillip Y. and Park, Chanho and Park, Mingue and Yoo, Seungwoo and Koo, Juil and Sung, Minhyuk}, title = {Token Warping Helps MLLMs Look from Nearby Viewpoints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3476-3488} }
Predictive Regularization Against Visual Representation Degradation in Multimodal Large Language Models: Enguang Wang,

Qiang Wang,

Yuanchen Wu,

Ke Yan,

Xinbin Yuan,

Shouhong Ding,

Xialei Liu,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Enguang and Wang, Qiang and Wu, Yuanchen and Yan, Ke and Yuan, Xinbin and Ding, Shouhong and Liu, Xialei and Cheng, Ming-Ming}, title = {Predictive Regularization Against Visual Representation Degradation in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8813-8824} }
MakeAnything: Harnessing Diffusion Transformers for Multi-Domain Procedural Sequence Generation: Yiren Song,

Cheng Liu,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yiren and Liu, Cheng and Shou, Mike Zheng}, title = {MakeAnything: Harnessing Diffusion Transformers for Multi-Domain Procedural Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11482-11492} }
Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping: Sunghyun Park,

Jeongho Kim,

Hyoungwoo Park,

Debasmit Das,

Sungrack Yun,

Munawar Hayat,

Jaegul Choo,

Fatih Porikli,

Seokeon Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Sunghyun and Kim, Jeongho and Park, Hyoungwoo and Das, Debasmit and Yun, Sungrack and Hayat, Munawar and Choo, Jaegul and Porikli, Fatih and Choi, Seokeon}, title = {Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11504-11514} }
pH-Strips for Selective Forgetting: A Blunt but Fast Diagnostic Baseline for Machine Unlearning: Chengyao Qian,

Jing Wu,

Trung Le,

Dinh Phung,

Mehrtash Harandi; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Chengyao and Wu, Jing and Le, Trung and Phung, Dinh and Harandi, Mehrtash}, title = {pH-Strips for Selective Forgetting: A Blunt but Fast Diagnostic Baseline for Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3306-3315} }
COPE: Consistent Occlusion and Prompt Enhancement Network for Occluded Person Re-identification: Siyi Sun,

Jinliang Lin,

Juanjuan Weng,

Zhihui Liu,

Shaozi Li,

Zhiming Luo; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Siyi and Lin, Jinliang and Weng, Juanjuan and Liu, Zhihui and Li, Shaozi and Luo, Zhiming}, title = {COPE: Consistent Occlusion and Prompt Enhancement Network for Occluded Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11293-11302} }
ReflexSplit: Single Image Reflection Separation via Layer Fusion-Separation: Chia-Ming Lee,

Yu-Fan Lin,

Jin-Hui Jiang,

Yu-Jou Hsiao,

Chih-Chung Hsu,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Chia-Ming and Lin, Yu-Fan and Jiang, Jin-Hui and Hsiao, Yu-Jou and Hsu, Chih-Chung and Liu, Yu-Lun}, title = {ReflexSplit: Single Image Reflection Separation via Layer Fusion-Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1300-1309} }
Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack: Chenyang Li,

Wenbing Tang,

Yihao Huang,

Sinong Simon Zhan,

Ming Hu,

Xiaojun Jia,

Yang Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenyang and Tang, Wenbing and Huang, Yihao and Zhan, Sinong Simon and Hu, Ming and Jia, Xiaojun and Liu, Yang}, title = {Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1565-1574} }
Geometry-as-context: Modulating Explicit 3D in Scene-consistent Video Generation to Geometry Context: JiaKui Hu,

Jialun Liu,

Liying Yang,

Xinliang Zhang,

Kaiwen Li,

Shuang Zeng,

Yuanwei Li,

Haibin Huang,

Chi Zhang,

Yanye Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, JiaKui and Liu, Jialun and Yang, Liying and Zhang, Xinliang and Li, Kaiwen and Zeng, Shuang and Li, Yuanwei and Huang, Haibin and Zhang, Chi and Lu, Yanye}, title = {Geometry-as-context: Modulating Explicit 3D in Scene-consistent Video Generation to Geometry Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4258-4268} }
PAUL: Uncertainty-Guided Partition and Augmentation for Robust Cross-View Geo-Localization under Noisy Correspondence: Zheng Li,

Xueyi Zhang,

Yanming Guo,

Yuxiang Xie,

Zhaoyun Ding,

Siqi Cai,

Haizhou Li,

Mingrui Lao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zheng and Zhang, Xueyi and Guo, Yanming and Xie, Yuxiang and Ding, Zhaoyun and Cai, Siqi and Li, Haizhou and Lao, Mingrui}, title = {PAUL: Uncertainty-Guided Partition and Augmentation for Robust Cross-View Geo-Localization under Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5389-5398} }
LightMover: Generative Light Movement with Color and Intensity Controls: Gengze Zhou,

Tianyu Wang,

Soo Ye Kim,

Zhixin Shu,

Xin Yu,

Yannick Hold-Geoffroy,

Sumit Chaturvedi,

Qi Wu,

Zhe Lin,

Scott Cohen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Gengze and Wang, Tianyu and Kim, Soo Ye and Shu, Zhixin and Yu, Xin and Hold-Geoffroy, Yannick and Chaturvedi, Sumit and Wu, Qi and Lin, Zhe and Cohen, Scott}, title = {LightMover: Generative Light Movement with Color and Intensity Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8997-9007} }
InfinityHuman: Towards Long-Term Audio-Driven Human Animation: Xiaodi Li,

Pan Xie,

Yi Ren,

Qijun Gan,

Chen Zhang,

Fangyuan Kong,

Xiang Yin,

Zehuan Yuan,

Bingyue Peng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaodi and Xie, Pan and Ren, Yi and Gan, Qijun and Zhang, Chen and Kong, Fangyuan and Yin, Xiang and Yuan, Zehuan and Peng, Bingyue}, title = {InfinityHuman: Towards Long-Term Audio-Driven Human Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3978-3987} }
VoDaSuRe: A Large-Scale Dataset Revealing Domain Shift in Volumetric Super-Resolution: August Leander Høeg,

Sophia Wiinberg Bardenfleth,

Hans Martin Kjer,

Tim Bjørn Dyrby,

Vedrana Andersen Dahl,

Anders Bjorholm Dahl; [pdf] [supp]
[bibtex]
@InProceedings{Hoeg_2026_CVPR, author = {H{\o}eg, August Leander and Bardenfleth, Sophia Wiinberg and Kjer, Hans Martin and Dyrby, Tim Bj{\o}rn and Dahl, Vedrana Andersen and Dahl, Anders Bjorholm}, title = {VoDaSuRe: A Large-Scale Dataset Revealing Domain Shift in Volumetric Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2166-2176} }
TokenHand: Discrete Token Representation for Efficient Hand Mesh Reconstruction: Xinguo He,

Yixin Shen,

Rahul Chaudhari; [pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xinguo and Shen, Yixin and Chaudhari, Rahul}, title = {TokenHand: Discrete Token Representation for Efficient Hand Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8921-8931} }
VAR RL Done Right: Tackling Asynchronous Policy Conflicts in Visual Autoregressive Generation: Shikun Sun,

Liao Qu,

Huichao Zhang,

Yiheng Liu,

Yangyang Song,

Xian Li,

Yi Jiang,

Xu Wang,

Jia Jia,

Daniel K. Du,

Xinglong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Shikun and Qu, Liao and Zhang, Huichao and Liu, Yiheng and Song, Yangyang and Li, Xian and Jiang, Yi and Wang, Xu and Jia, Jia and Du, Daniel K. and Wu, Xinglong}, title = {VAR RL Done Right: Tackling Asynchronous Policy Conflicts in Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1874-1884} }
Advancing Cancer Prognosis with Hierarchical Fusion of Genomic, Proteomic and Pathology Imaging Data from a Systems Biology Perspective: Junjie Zhou,

Bao Xue,

Meiling Wang,

Wei Shao,

Daoqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Junjie and Xue, Bao and Wang, Meiling and Shao, Wei and Zhang, Daoqiang}, title = {Advancing Cancer Prognosis with Hierarchical Fusion of Genomic, Proteomic and Pathology Imaging Data from a Systems Biology Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12554-12564} }
Hoi! - A Multimodal Dataset for Force-Grounded, Cross-View Articulated Manipulation: Tim Engelbracht,

René Zurbrügg,

Matteo Wohlrapp,

Martin Büchner,

Abhinav Valada,

Marc Pollefeys,

Hermann Blum,

Zuria Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Engelbracht_2026_CVPR, author = {Engelbracht, Tim and Zurbr\"ugg, Ren\'e and Wohlrapp, Matteo and B\"uchner, Martin and Valada, Abhinav and Pollefeys, Marc and Blum, Hermann and Bauer, Zuria}, title = {Hoi! - A Multimodal Dataset for Force-Grounded, Cross-View Articulated Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8880-8890} }
MultiBanana: A Challenging Benchmark for Multi-Reference Text-to-Image Generation: Yuta Oshima,

Daiki Miyake,

Kohsei Matsutani,

Yusuke Iwasawa,

Masahiro Suzuki,

Yutaka Matsuo,

Hiroki Furuta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oshima_2026_CVPR, author = {Oshima, Yuta and Miyake, Daiki and Matsutani, Kohsei and Iwasawa, Yusuke and Suzuki, Masahiro and Matsuo, Yutaka and Furuta, Hiroki}, title = {MultiBanana: A Challenging Benchmark for Multi-Reference Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {448-460} }
Distilling Unsigned Distance Function for Surface Reconstruction from 3D Gaussian Splatting: Qian Li,

Rao Fu,

Jiangtao Li,

Fan Liu; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qian and Fu, Rao and Li, Jiangtao and Liu, Fan}, title = {Distilling Unsigned Distance Function for Surface Reconstruction from 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4891-4901} }
TouchDream: 3D Object Completion through Imagined Touch: Yuanbo Wang,

Xinning Wang,

Zhaoxuan Zhang,

Changlong Wang,

Qianchen Xia,

Xiaopeng Wei,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanbo and Wang, Xinning and Zhang, Zhaoxuan and Wang, Changlong and Xia, Qianchen and Wei, Xiaopeng and Yang, Xin}, title = {TouchDream: 3D Object Completion through Imagined Touch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8901-8910} }
Real-Time Long Horizon Air Quality Forecasting via Group-Relative Policy Optimization: Inha Kang,

Eunki Kim,

Wonjeong Ryu,

Jaeyo Shin,

Seungjun Yu,

Yoon-Hee Kang,

Seongeun Jeong,

Eunhye Kim,

Soontae Kim,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Inha and Kim, Eunki and Ryu, Wonjeong and Shin, Jaeyo and Yu, Seungjun and Kang, Yoon-Hee and Jeong, Seongeun and Kim, Eunhye and Kim, Soontae and Shim, Hyunjung}, title = {Real-Time Long Horizon Air Quality Forecasting via Group-Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6421-6431} }
Say Cheese! Detail-Preserving Portrait Collection Generation via Natural Language Edits: Zelong Sun,

Jiahui Wu,

Ying Ba,

Dong Jing,

Zhiwu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zelong and Wu, Jiahui and Ba, Ying and Jing, Dong and Lu, Zhiwu}, title = {Say Cheese! Detail-Preserving Portrait Collection Generation via Natural Language Edits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7827-7836} }
FedSST: Rethinking Fair Federated Graph Learning under Structural Shift: Dingyi Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dingyi}, title = {FedSST: Rethinking Fair Federated Graph Learning under Structural Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10335-10345} }
Beyond Perceptual Shortcuts: Causal-Inspired Debiasing Optimization for Generalizable Video Reasoning in Lightweight MLLMs: Jingze Wu,

Quan Zhang,

Hongfei Suo,

Zeqiang Cai,

Hongbo Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jingze and Zhang, Quan and Suo, Hongfei and Cai, Zeqiang and Chen, Hongbo}, title = {Beyond Perceptual Shortcuts: Causal-Inspired Debiasing Optimization for Generalizable Video Reasoning in Lightweight MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12259-12268} }
Scalable Trajectory Generation for Whole-Body Mobile Manipulation: Yida Niu,

Xinhai Chang,

Xin Liu,

Ziyuan Jiao,

Yixin Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Yida and Chang, Xinhai and Liu, Xin and Jiao, Ziyuan and Zhu, Yixin}, title = {Scalable Trajectory Generation for Whole-Body Mobile Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1798-1808} }
UniDef: Universal Defense Against Unauthorized Image Manipulation: Mingwen Shao,

Lingzhuang Meng,

Xiang Lv,

Mengyao Wu,

Xinyuan Chen,

Qiao Zhang,

Chang Liu,

Yuanjian Qiao,

Chao Dong; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Mingwen and Meng, Lingzhuang and Lv, Xiang and Wu, Mengyao and Chen, Xinyuan and Zhang, Qiao and Liu, Chang and Qiao, Yuanjian and Dong, Chao}, title = {UniDef: Universal Defense Against Unauthorized Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8631-8640} }
CLCR: Cross-Level Semantic Collaborative Representation for Multimodal Learning: Chunlei Meng,

Guanhong Huang,

Rong Fu,

Runmin Jian,

Zhongxue Gan,

Chun Ouyang; [pdf] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Chunlei and Huang, Guanhong and Fu, Rong and Jian, Runmin and Gan, Zhongxue and Ouyang, Chun}, title = {CLCR: Cross-Level Semantic Collaborative Representation for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1606-1615} }
Optical Diffraction-based Convolution for Semiconductor Lithography: Young-Han Son,

Dong-Hee Shin,

Deok-Joong Lee,

Hyun Jung Lee,

Tae-Eui Kam; [pdf] [supp]
[bibtex]
@InProceedings{Son_2026_CVPR, author = {Son, Young-Han and Shin, Dong-Hee and Lee, Deok-Joong and Lee, Hyun Jung and Kam, Tae-Eui}, title = {Optical Diffraction-based Convolution for Semiconductor Lithography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12458-12468} }
GSNR: Graph Smooth Null-Space Representation for Inverse Problems: Romario Gualdrón-Hurtado,

Roman Jacome,

Rafael S. Suárez,

Henry Arguello; [pdf] [supp]
[bibtex]
@InProceedings{Gualdron-Hurtado_2026_CVPR, author = {Gualdr\'on-Hurtado, Romario and Jacome, Roman and Su\'arez, Rafael S. and Arguello, Henry}, title = {GSNR: Graph Smooth Null-Space Representation for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12469-12479} }
U-Mind: A Unified Framework for Real-Time Multimodal Interaction with Audiovisual Generation: Xiang Deng,

Feng Gao,

Yong Zhang,

Youxin Pang,

Xu Xiaoming,

Zhuoliang Kang,

Xiaoming Wei,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Xiang and Gao, Feng and Zhang, Yong and Pang, Youxin and Xiaoming, Xu and Kang, Zhuoliang and Wei, Xiaoming and Liu, Yebin}, title = {U-Mind: A Unified Framework for Real-Time Multimodal Interaction with Audiovisual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10874-10886} }
Beyond Patches: Global-aware Autoregressive Model for Multimodal Few-Shot Font Generation: Haonan Cai,

Yuxuan Luo,

Zhouhui Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Haonan and Luo, Yuxuan and Lian, Zhouhui}, title = {Beyond Patches: Global-aware Autoregressive Model for Multimodal Few-Shot Font Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {517-528} }
eRetinexGS: Retinex Modeling for Low-Light Scene Enhancement via Event Streams and 3D Gaussian Splatting: Haojie Yan,

Zehao Chen,

Yan Liu,

Shi Gu,

Peng Lin,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haojie and Chen, Zehao and Liu, Yan and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {eRetinexGS: Retinex Modeling for Low-Light Scene Enhancement via Event Streams and 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8056-8066} }
WebGym: Scaling Training Environments for Long-Horizon Visual Web Agents with Realistic Tasks: Hao Bai,

Alexey Taymanov,

Tong Zhang,

Aviral Kumar,

Spencer Whitehead; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Hao and Taymanov, Alexey and Zhang, Tong and Kumar, Aviral and Whitehead, Spencer}, title = {WebGym: Scaling Training Environments for Long-Horizon Visual Web Agents with Realistic Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12248-12258} }
Test-Time Multi-Prompt Adaptation for Open-Vocabulary Remote Sensing Image Segmentation: Ting Yang,

Qilong Wang,

Qibin Hou,

Qinghua Hu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ting and Wang, Qilong and Hou, Qibin and Hu, Qinghua}, title = {Test-Time Multi-Prompt Adaptation for Open-Vocabulary Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10699-10709} }
The Midas Touch for Metric Depth: Yu Ma,

Zizhan Guo,

Zuyi Xiong,

Haoran Zhang,

Yi Feng,

Hongbo Zhao,

Hanli Wang,

Rui Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yu and Guo, Zizhan and Xiong, Zuyi and Zhang, Haoran and Feng, Yi and Zhao, Hongbo and Wang, Hanli and Fan, Rui}, title = {The Midas Touch for Metric Depth}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5804-5813} }
Narrative Weaver: Towards Controllable Long-Range Visual Consistency with Multi-Modal Conditioning: Zhengjian Yao,

Yongzhi Li,

Xinyuan Gao,

Quan Chen,

Peng Jiang,

Yanye Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Zhengjian and Li, Yongzhi and Gao, Xinyuan and Chen, Quan and Jiang, Peng and Lu, Yanye}, title = {Narrative Weaver: Towards Controllable Long-Range Visual Consistency with Multi-Modal Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7707-7718} }
Dual-branch Distilled Transformer for Efficient Asymmetric UAV Tracking: Hongtao Yang,

Bineng Zhong,

Qihua Liang,

Yaozong Zheng,

Xiantao Hu,

Yuanliang Xue,

Shuxiang Song; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Hongtao and Zhong, Bineng and Liang, Qihua and Zheng, Yaozong and Hu, Xiantao and Xue, Yuanliang and Song, Shuxiang}, title = {Dual-branch Distilled Transformer for Efficient Asymmetric UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13615-13625} }
DiffSoup: Direct Differentiable Rasterization of Triangle Soup for Extreme Radiance Field Simplification: Kenji Tojo,

Bernd Bickel,

Nobuyuki Umetani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tojo_2026_CVPR, author = {Tojo, Kenji and Bickel, Bernd and Umetani, Nobuyuki}, title = {DiffSoup: Direct Differentiable Rasterization of Triangle Soup for Extreme Radiance Field Simplification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8353-8363} }
Learning to See and Act: Task-Aware Virtual View Exploration for Robotic Manipulation: Yongjie Bai,

Zhouxia Wang,

Yang Liu,

Kaijun Luo,

Yifan Wen,

Mingtong Dai,

Weixing Chen,

Ziliang Chen,

Lingbo Liu,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Yongjie and Wang, Zhouxia and Liu, Yang and Luo, Kaijun and Wen, Yifan and Dai, Mingtong and Chen, Weixing and Chen, Ziliang and Liu, Lingbo and Li, Guanbin and Lin, Liang}, title = {Learning to See and Act: Task-Aware Virtual View Exploration for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13386-13396} }
Downscaling Intelligence: Exploring Perception and Reasoning Bottlenecks in Small Multimodal Models: Mark Endo,

Serena Yeung-Levy; [pdf] [supp]
[bibtex]
@InProceedings{Endo_2026_CVPR, author = {Endo, Mark and Yeung-Levy, Serena}, title = {Downscaling Intelligence: Exploring Perception and Reasoning Bottlenecks in Small Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {778-788} }
FlexiVideo: Variation-Aware Temporal Dynamics Modeling for Efficient Video Understanding: Da Peng,

Xuesong Yang,

Zonghao Guo,

Yichen Zhang,

Chi Chen,

Yidan Zhang,

Yuan Yao,

Fang Wan,

Wei Ke,

Maosong Sun; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Da and Yang, Xuesong and Guo, Zonghao and Zhang, Yichen and Chen, Chi and Zhang, Yidan and Yao, Yuan and Wan, Fang and Ke, Wei and Sun, Maosong}, title = {FlexiVideo: Variation-Aware Temporal Dynamics Modeling for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9804-9814} }
Vinedresser3D: Towards Agentic Text-guided 3D Editing: Yankuan Chi,

Xiang Li,

Zixuan Huang,

James Matthew Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Chi_2026_CVPR, author = {Chi, Yankuan and Li, Xiang and Huang, Zixuan and Rehg, James Matthew}, title = {Vinedresser3D: Towards Agentic Text-guided 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12673-12683} }
VideoMaMa: Mask-Guided Video Matting via Generative Prior: Sangbeom Lim,

Seoung Wug Oh,

Jiahui Huang,

Heeji Yoon,

Seungryong Kim,

Joon-Young Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Sangbeom and Oh, Seoung Wug and Huang, Jiahui and Yoon, Heeji and Kim, Seungryong and Lee, Joon-Young}, title = {VideoMaMa: Mask-Guided Video Matting via Generative Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3845-3855} }
WebChain: A Large-Scale Human-Annotated Dataset of Real-World Web Interaction Traces: Sicheng Fan,

Rui Wan,

Yifei Leng,

Gaoning Liang,

Li Ling,

Yanyi Shang,

Dehan Kong; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Sicheng and Wan, Rui and Leng, Yifei and Liang, Gaoning and Ling, Li and Shang, Yanyi and Kong, Dehan}, title = {WebChain: A Large-Scale Human-Annotated Dataset of Real-World Web Interaction Traces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6209-6218} }
EvoComp: Learning Visual Token Compression for Multimodal Large Language Models via Semantic-Guided Evolutionary Labeling: Jiafei Song,

Fengwei Zhou,

Jin Qu,

Wenjin Jason Li,

Tong Wu,

Gengjian Xue,

Zhikang Zhao,

Daomin Wei,

Yichao Lu,

Bailin Na; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Jiafei and Zhou, Fengwei and Qu, Jin and Li, Wenjin Jason and Wu, Tong and Xue, Gengjian and Zhao, Zhikang and Wei, Daomin and Lu, Yichao and Na, Bailin}, title = {EvoComp: Learning Visual Token Compression for Multimodal Large Language Models via Semantic-Guided Evolutionary Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3532-3542} }
The Golden Subspace: Where Efficiency Meets Generalization in Continual Test-Time Adaptation: Guannan Lai,

Da-Wei Zhou,

Zhenguo Li,

Han-Jia Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Guannan and Zhou, Da-Wei and Li, Zhenguo and Ye, Han-Jia}, title = {The Golden Subspace: Where Efficiency Meets Generalization in Continual Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3866-3875} }
Long-SCOPE: Fully Sparse Long-Range Cooperative 3D Perception: Jiahao Wang,

Zikun Xu,

Yuner Zhang,

Zhongwei Jiang,

Chenyang Lu,

Shuocheng Yang,

Yuxuan Wang,

Jiaru Zhong,

Chuang Zhang,

Shaobing Xu,

Jianqiang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Xu, Zikun and Zhang, Yuner and Jiang, Zhongwei and Lu, Chenyang and Yang, Shuocheng and Wang, Yuxuan and Zhong, Jiaru and Zhang, Chuang and Xu, Shaobing and Wang, Jianqiang}, title = {Long-SCOPE: Fully Sparse Long-Range Cooperative 3D Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11599-11609} }
Momentum Memory for Knowledge Distillation in Computational Pathology: Yongxin Guo,

Hao Lu,

Onur C. Koyun,

Zhengjie Zhu,

Muhammet F. Demir,

Metin N. Gurcan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yongxin and Lu, Hao and Koyun, Onur C. and Zhu, Zhengjie and Demir, Muhammet F. and Gurcan, Metin N.}, title = {Momentum Memory for Knowledge Distillation in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6889-6899} }
Efficiently Reconstructing Dynamic Scenes One D4RT at a Time: Chuhan Zhang,

Guillaume Le Moing,

Skanda Koppula,

Ignacio Rocco,

Liliane Momeni,

Junyu Xie,

Shuyang Sun,

Rahul Sukthankar,

Joëlle K. Barral,

Raia Hadsell,

Zoubin Ghahramani,

Andrew Zisserman,

Junlin Zhang,

Mehdi S. M. Sajjadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chuhan and Le Moing, Guillaume and Koppula, Skanda and Rocco, Ignacio and Momeni, Liliane and Xie, Junyu and Sun, Shuyang and Sukthankar, Rahul and Barral, Jo\"elle K. and Hadsell, Raia and Ghahramani, Zoubin and Zisserman, Andrew and Zhang, Junlin and Sajjadi, Mehdi S. M.}, title = {Efficiently Reconstructing Dynamic Scenes One D4RT at a Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7382-7392} }
Ref4D-VideoBench: Four-Dimensional Reference-Based Evaluation of Text-to-Video Generative Models: Jiajia Wei,

Yujia He,

Yuhan Hou,

Hang Qi,

Sihua Wang,

Jincheng Shi,

Kwok Fung Li,

Zibin Zheng,

Weibin Wu; [pdf]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jiajia and He, Yujia and Hou, Yuhan and Qi, Hang and Wang, Sihua and Shi, Jincheng and Li, Kwok Fung and Zheng, Zibin and Wu, Weibin}, title = {Ref4D-VideoBench: Four-Dimensional Reference-Based Evaluation of Text-to-Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7719-7729} }
TeHOR: Text-Guided 3D Human and Object Reconstruction with Textures: Hyeongjin Nam,

Daniel Sungho Jung,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2026_CVPR, author = {Nam, Hyeongjin and Jung, Daniel Sungho and Lee, Kyoung Mu}, title = {TeHOR: Text-Guided 3D Human and Object Reconstruction with Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7100-7110} }
Divide, Conquer, and Aggregate: Asymmetric Experts for Class-Imbalanced Semi-Supervised Medical Image Segmentation: Yajun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yajun}, title = {Divide, Conquer, and Aggregate: Asymmetric Experts for Class-Imbalanced Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8503-8513} }
Dynamic Logits Adjustment and Exploration for Test-Time Adaptation in Vision Language Models: Haoyan Wu,

Yahao Liu,

Yinjie Lei,

Lixin Duan,

Wen Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haoyan and Liu, Yahao and Lei, Yinjie and Duan, Lixin and Li, Wen}, title = {Dynamic Logits Adjustment and Exploration for Test-Time Adaptation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3143-3153} }
CARE What Fails: Contrastive Anchored-REflection for Verifiable Multimodal Reasoning: Yongxin Wang,

Zhicheng Yang,

Meng Cao,

Mingfei Han,

Haokun Lin,

Yingying Zhu,

Xiaojun Chang,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yongxin and Yang, Zhicheng and Cao, Meng and Han, Mingfei and Lin, Haokun and Zhu, Yingying and Chang, Xiaojun and Liang, Xiaodan}, title = {CARE What Fails: Contrastive Anchored-REflection for Verifiable Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11976-11986} }
Beyond Tie Points: Satellite Image Block Adjustment based on Dense Feature Consistency: Yi Liu,

Yi Wan,

Lei Yu,

Panwang Xia,

Qiong Wu,

Yingying Pei,

Xuejun Huang,

Junjian Zhang,

Xiangyuan Cai,

Hongwei Hu,

Yongjun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yi and Wan, Yi and Yu, Lei and Xia, Panwang and Wu, Qiong and Pei, Yingying and Huang, Xuejun and Zhang, Junjian and Cai, Xiangyuan and Hu, Hongwei and Zhang, Yongjun}, title = {Beyond Tie Points: Satellite Image Block Adjustment based on Dense Feature Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6443-6452} }
Extend3D: Town-Scale 3D Generation: Seungwoo Yoon,

Jinmo Kim,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Seungwoo and Kim, Jinmo and Park, Jaesik}, title = {Extend3D: Town-Scale 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5892-5901} }
A Style is Worth One Code: Unlocking Code-to-Style Image Generation with Discrete Style Space: Huijie Liu,

Shuhao Cui,

Haoxiang Cao,

Shuai Ma,

Kai Wu,

Guoliang Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Huijie and Cui, Shuhao and Cao, Haoxiang and Ma, Shuai and Wu, Kai and Kang, Guoliang}, title = {A Style is Worth One Code: Unlocking Code-to-Style Image Generation with Discrete Style Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1-10} }
Heuristic Self-Paced Learning for Domain Adaptive Semantic Segmentation under Adverse Conditions: Shiqin Wang,

Haoyang Chen,

Huaizhou Huang,

Yinkan He,

Dongfang Sun,

Xiaoqing Chen,

Xingyu Liu,

Zheng Wang,

Kaiyan Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shiqin and Chen, Haoyang and Huang, Huaizhou and He, Yinkan and Sun, Dongfang and Chen, Xiaoqing and Liu, Xingyu and Wang, Zheng and Zhao, Kaiyan}, title = {Heuristic Self-Paced Learning for Domain Adaptive Semantic Segmentation under Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3815-3824} }
GeoPredict: Leveraging Predictive Kinematics and 3D Gaussian Geometry for Precise VLA Manipulation: Jingjing Qian,

Boyao Han,

Chen Shi,

Lei Xiao,

Long Yang,

Shaoshuai Shi,

Li Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Jingjing and Han, Boyao and Shi, Chen and Xiao, Lei and Yang, Long and Shi, Shaoshuai and Jiang, Li}, title = {GeoPredict: Leveraging Predictive Kinematics and 3D Gaussian Geometry for Precise VLA Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13529-13539} }
PoseGAM: Robust Unseen Object Pose Estimation via Geometry-Aware Multi-View Reasoning: Jianqi Chen,

Biao Zhang,

Xiangjun Tang,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jianqi and Zhang, Biao and Tang, Xiangjun and Wonka, Peter}, title = {PoseGAM: Robust Unseen Object Pose Estimation via Geometry-Aware Multi-View Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7197-7208} }
Event-based Motion Deblurring with Unpaired Data: Hoonhee Cho,

Yuhwan Jeong,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Hoonhee and Jeong, Yuhwan and Yoon, Kuk-Jin}, title = {Event-based Motion Deblurring with Unpaired Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {882-891} }
DualMirage: Hunting Stealthy Multimodal LLM Agents via CAPTCHAs with Contour and Adversarial Illusions: Bei Chen,

Gaolei Li,

Jun Wu,

Jianhua Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Bei and Li, Gaolei and Wu, Jun and Li, Jianhua}, title = {DualMirage: Hunting Stealthy Multimodal LLM Agents via CAPTCHAs with Contour and Adversarial Illusions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1523-1532} }
VGGT-Det: Mining VGGT Internal Priors for Sensor-Geometry-Free Multi-View Indoor 3D Object Detection: Yang Cao,

Feize Wu,

Dave Zhenyu Chen,

Yingji Zhong,

Lanqing Hong,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yang and Wu, Feize and Chen, Dave Zhenyu and Zhong, Yingji and Hong, Lanqing and Xu, Dan}, title = {VGGT-Det: Mining VGGT Internal Priors for Sensor-Geometry-Free Multi-View Indoor 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4708-4717} }
Hugging Visual Prompt and Segmentation Tokens: Consistency Learning for Fine-Grained Visual Understanding in MLLMs: Jing Yang,

Sen Yang,

Boqiang Duan,

Ming Dai,

Wei Zhang,

Xiao Tan,

Kunbin Chen,

Wei He,

Jingdong Wang,

Hanli Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Yang, Sen and Duan, Boqiang and Dai, Ming and Zhang, Wei and Tan, Xiao and Chen, Kunbin and He, Wei and Wang, Jingdong and Wang, Hanli}, title = {Hugging Visual Prompt and Segmentation Tokens: Consistency Learning for Fine-Grained Visual Understanding in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5175-5186} }
UniPR: Unified Object-level Real-to-Sim Perception and Reconstruction from a Single Stereo Pair: Chuanrui Zhang,

Yingshuang Zou,

ZhengXian Wu,

Yonggen Ling,

Yuxiao Yang,

Ziwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chuanrui and Zou, Yingshuang and Wu, ZhengXian and Ling, Yonggen and Yang, Yuxiao and Wang, Ziwei}, title = {UniPR: Unified Object-level Real-to-Sim Perception and Reconstruction from a Single Stereo Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4667-4676} }
DiffDecompose: Layer-Wise Decomposition of Alpha-Composited Images via Diffusion Transformers: Zitong Wang,

Hang Zhao,

Qianyu Zhou,

Xuequan Lu,

Xiangtai Li,

Hao Yang,

Bo Yang,

Yiren Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zitong and Zhao, Hang and Zhou, Qianyu and Lu, Xuequan and Li, Xiangtai and Yang, Hao and Yang, Bo and Song, Yiren}, title = {DiffDecompose: Layer-Wise Decomposition of Alpha-Composited Images via Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4624-4634} }
ERMoE: Eigen-Reparameterized Mixture-of-Experts for Stable Routing and Interpretable Specialization: Anzhe Cheng,

Shukai Duan,

Shixuan Li,

Chenzhong Yin,

Mingxi Cheng,

Heng Ping,

Tamoghna Chattopadhyay,

Sophia I. Thomopoulos,

Shahin Nazarian,

Paul Thompson,

Paul Bogdan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Anzhe and Duan, Shukai and Li, Shixuan and Yin, Chenzhong and Cheng, Mingxi and Ping, Heng and Chattopadhyay, Tamoghna and Thomopoulos, Sophia I. and Nazarian, Shahin and Thompson, Paul and Bogdan, Paul}, title = {ERMoE: Eigen-Reparameterized Mixture-of-Experts for Stable Routing and Interpretable Specialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12997-13006} }
Counterfactual VLA: Self-Reflective Vision-Language-Action Model with Adaptive Reasoning: Zhenghao Peng,

Wenhao Ding,

Yurong You,

Yuxiao Chen,

Wenjie Luo,

Thomas Tian,

Yulong Cao,

Apoorva Sharma,

Danfei Xu,

Boris Ivanovic,

Boyi Li,

Yan Wang,

Marco Pavone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Zhenghao and Ding, Wenhao and You, Yurong and Chen, Yuxiao and Luo, Wenjie and Tian, Thomas and Cao, Yulong and Sharma, Apoorva and Xu, Danfei and Ivanovic, Boris and Li, Boyi and Wang, Yan and Pavone, Marco}, title = {Counterfactual VLA: Self-Reflective Vision-Language-Action Model with Adaptive Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4022-4031} }
Dynamic Momentum Recalibration in Online Gradient Learning: Zhipeng Yao,

Rui Yu,

Guisong Chang,

Ying Li,

Yu Zhang,

Dazhou Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Zhipeng and Yu, Rui and Chang, Guisong and Li, Ying and Zhang, Yu and Li, Dazhou}, title = {Dynamic Momentum Recalibration in Online Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12902-12912} }
AG-VAS: Anchor-Guided Zero-Shot Visual Anomaly Segmentation with Large Multimodal Models: Zhen Qu,

Xian Tao,

Xiaoyi Bao,

Dingrong Wang,

ShiChen Qu,

Zhengtao Zhang,

Xingang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Zhen and Tao, Xian and Bao, Xiaoyi and Wang, Dingrong and Qu, ShiChen and Zhang, Zhengtao and Wang, Xingang}, title = {AG-VAS: Anchor-Guided Zero-Shot Visual Anomaly Segmentation with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14126-14136} }
Opening the Sim-to-Real Door for Humanoid Pixel-to-Action Policy Transfer: Haoru Xue,

Tairan He,

Zi Wang,

Qingwei Ben,

Wenli Xiao,

Zhengyi Luo,

Xingye Da,

Fernando Castañeda,

Guanya Shi,

Shankar Sastry,

Linxi Fan,

Yuke Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Haoru and He, Tairan and Wang, Zi and Ben, Qingwei and Xiao, Wenli and Luo, Zhengyi and Da, Xingye and Casta\~neda, Fernando and Shi, Guanya and Sastry, Shankar and Fan, Linxi and Zhu, Yuke}, title = {Opening the Sim-to-Real Door for Humanoid Pixel-to-Action Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6642-6652} }
STAGE: Storyboard-Anchored Generation for Cinematic Multi-shot Narrative: Peixuan Zhang,

Zijian Jia,

Kaiqi Liu,

Shuchen Weng,

Si Li,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peixuan and Jia, Zijian and Liu, Kaiqi and Weng, Shuchen and Li, Si and Shi, Boxin}, title = {STAGE: Storyboard-Anchored Generation for Cinematic Multi-shot Narrative}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {659-669} }
Affordance-First Decomposition for Continual Learning in Video-Language Understanding: Mengzhu xu,

Hanzhi Liu,

Ningkang Peng,

Qianyu Chen,

Canran Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{xu_2026_CVPR, author = {xu, Mengzhu and Liu, Hanzhi and Peng, Ningkang and Chen, Qianyu and Xiao, Canran}, title = {Affordance-First Decomposition for Continual Learning in Video-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3908-3919} }
How Much 3D Do Video Foundation Models Encode?: Zixuan Huang,

Xiang Li,

Zhaoyang Lv,

James M. Rehg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zixuan and Li, Xiang and Lv, Zhaoyang and Rehg, James M.}, title = {How Much 3D Do Video Foundation Models Encode?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {384-394} }
AdaPrior: Bayesian-Inspired Adaptive Prior Correction for Long-Tailed Continual Learning: S Divakar Bhat,

Amit Popat More,

Mudit Soni,

Bhuvan Aggarwal; [pdf] [supp]
[bibtex]
@InProceedings{Bhat_2026_CVPR, author = {Bhat, S Divakar and More, Amit Popat and Soni, Mudit and Aggarwal, Bhuvan}, title = {AdaPrior: Bayesian-Inspired Adaptive Prior Correction for Long-Tailed Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10840-10850} }
Curriculum Group Policy Optimization: Adaptive Sampling for Unleashing the Potential of Text-to-Image Generation: Baoteng Li,

Xianghao Zang,

Xinran Wang,

Xiangyu Na,

Zhixiang He,

Hao Sun,

Chi Zhang,

Zhongjiang He,

Tianwei Cao,

Kongming Liang,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Baoteng and Zang, Xianghao and Wang, Xinran and Na, Xiangyu and He, Zhixiang and Sun, Hao and Zhang, Chi and He, Zhongjiang and Cao, Tianwei and Liang, Kongming and Ma, Zhanyu}, title = {Curriculum Group Policy Optimization: Adaptive Sampling for Unleashing the Potential of Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {561-571} }
Towards Sparse Video Understanding and Reasoning: Chenwei Xu,

Zhen Ye,

Shang Wu,

Weijian Li,

Zihan Wang,

Zhuofan Xia,

Lie Lu,

Pranav Maneriker,

Fan Du,

Manling Li,

Han Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Chenwei and Ye, Zhen and Wu, Shang and Li, Weijian and Wang, Zihan and Xia, Zhuofan and Lu, Lie and Maneriker, Pranav and Du, Fan and Li, Manling and Liu, Han}, title = {Towards Sparse Video Understanding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11357-11368} }
TIM: Temporal Decoupling with Iterative Mutual-Refinement Model for Longitudinal Radiology Report Generation: Yiheng Dong,

Yi Lin,

Shilong Huang,

Xiyan Yang,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Yiheng and Lin, Yi and Huang, Shilong and Yang, Xiyan and Yang, Xin}, title = {TIM: Temporal Decoupling with Iterative Mutual-Refinement Model for Longitudinal Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6951-6961} }
Prompt-Free Universal Region Proposal Network: Qihong Tang,

Changhan Liu,

Shaofeng Zhang,

Wenbin Li,

Qi Fan,

Yang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Qihong and Liu, Changhan and Zhang, Shaofeng and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {Prompt-Free Universal Region Proposal Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13080-13090} }
Stabilizing Feature Geometry in Noisy Pretrained Models for Robust Downstream Tasks: Quanyu Zhang,

Zhongyi Han,

Hao Sun,

Yongshun Gong,

Xiaoyan Wang,

Yilong Yin,

Shuo Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Quanyu and Han, Zhongyi and Sun, Hao and Gong, Yongshun and Wang, Xiaoyan and Yin, Yilong and Li, Shuo}, title = {Stabilizing Feature Geometry in Noisy Pretrained Models for Robust Downstream Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {789-800} }
Fast Spatial Tracking with Visual Geometry Transformer: Chengjie Huang,

Guile Wu,

Dongfeng Bai,

Bingbing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Chengjie and Wu, Guile and Bai, Dongfeng and Liu, Bingbing}, title = {Fast Spatial Tracking with Visual Geometry Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {374-383} }
Imagine Before Concentration: Diffusion-Guided Registers Enhance Partially Relevant Video Retrieval: Jun Li,

Xuhang Lou,

Jinpeng Wang,

Yuting Wang,

Yaowei Wang,

Shu-Tao Xia,

Bin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jun and Lou, Xuhang and Wang, Jinpeng and Wang, Yuting and Wang, Yaowei and Xia, Shu-Tao and Chen, Bin}, title = {Imagine Before Concentration: Diffusion-Guided Registers Enhance Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9710-9721} }
VideoSeek: Long-Horizon Video Agent with Tool-Guided Seeking: Jingyang Lin,

Jialian Wu,

Jiang Liu,

Ximeng Sun,

Ze Wang,

Xiaodong Yu,

Jiebo Luo,

Zicheng Liu,

Emad Barsoum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jingyang and Wu, Jialian and Liu, Jiang and Sun, Ximeng and Wang, Ze and Yu, Xiaodong and Luo, Jiebo and Liu, Zicheng and Barsoum, Emad}, title = {VideoSeek: Long-Horizon Video Agent with Tool-Guided Seeking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5465-5475} }
EgoFlow: Gradient-Guided Flow Matching for Egocentric 6DoF Object Motion Generation: Abhishek Saroha,

Huajian Zeng,

Xingxing Zuo,

Daniel Cremers,

Xi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saroha_2026_CVPR, author = {Saroha, Abhishek and Zeng, Huajian and Zuo, Xingxing and Cremers, Daniel and Wang, Xi}, title = {EgoFlow: Gradient-Guided Flow Matching for Egocentric 6DoF Object Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4332-4342} }
VA-p: Variational Policy Alignment for Pixel-Aware Autoregressive Generation: Xinyao Liao,

Qiyuan He,

Kai Xu,

Xiaoye Qu,

Yicong Li,

Wei Wei,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Xinyao and He, Qiyuan and Xu, Kai and Qu, Xiaoye and Li, Yicong and Wei, Wei and Yao, Angela}, title = {VA-p: Variational Policy Alignment for Pixel-Aware Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12787-12797} }
GGBench: A Geometric Generative Reasoning Benchmark for Unified Multimodal Models: Jingxuan Wei,

Caijun Jia,

Xi Bai,

Xinglong Xu,

Siyuan Li,

Linzhuang Sun,

Bihui Yu,

Conghui He,

Lijun Wu,

Cheng Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jingxuan and Jia, Caijun and Bai, Xi and Xu, Xinglong and Li, Siyuan and Sun, Linzhuang and Yu, Bihui and He, Conghui and Wu, Lijun and Tan, Cheng}, title = {GGBench: A Geometric Generative Reasoning Benchmark for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5199-5210} }
NeuroRule: Bridging Vision and Logic with Differentiable Rule Induction: Muhammad Zarar,

Mingzheng Zhang,

Xiaowang Zhang,

Zhiyong Feng; [pdf] [supp]
[bibtex]
@InProceedings{Zarar_2026_CVPR, author = {Zarar, Muhammad and Zhang, Mingzheng and Zhang, Xiaowang and Feng, Zhiyong}, title = {NeuroRule: Bridging Vision and Logic with Differentiable Rule Induction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11654-11663} }
Learning to Act Robustly with View-Invariant Latent Actions: Youngjoon Jeong,

Junha Chun,

Taesup Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Youngjoon and Chun, Junha and Kim, Taesup}, title = {Learning to Act Robustly with View-Invariant Latent Actions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6781-6790} }
Beyond Single Solution: Multi-Hypothesis Deep Unfolding Network for Image Compressive Sensing: Wenxue Cui,

Hualin Li,

Yuhang Qin,

Yifu Xu,

Xiaopeng Fan,

Debin Zhao; [pdf]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Wenxue and Li, Hualin and Qin, Yuhang and Xu, Yifu and Fan, Xiaopeng and Zhao, Debin}, title = {Beyond Single Solution: Multi-Hypothesis Deep Unfolding Network for Image Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5283-5293} }
Thermal Diffusion Matters: Infrared Spatial-Temporal Video Super-Resolution through Heat Conduction Priors: Mingxuan Zhou,

Shuang Li,

Yutang Zhang,

Jing Geng,

Yirui Shen,

Jingxuan Kang,

Fuzhen Zhuang,

Shuigen Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Mingxuan and Li, Shuang and Zhang, Yutang and Geng, Jing and Shen, Yirui and Kang, Jingxuan and Zhuang, Fuzhen and Wang, Shuigen}, title = {Thermal Diffusion Matters: Infrared Spatial-Temporal Video Super-Resolution through Heat Conduction Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2146-2155} }
Diffusion-Based Makeup Transfer with Facial Region-Aware Makeup Features: Zheng Gao,

Debin Meng,

Yunqi Miao,

Zhensong Zhang,

Songcen Xu,

Ioannis Patras,

Jifei Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Zheng and Meng, Debin and Miao, Yunqi and Zhang, Zhensong and Xu, Songcen and Patras, Ioannis and Song, Jifei}, title = {Diffusion-Based Makeup Transfer with Facial Region-Aware Makeup Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4656-4666} }
S2C2Seg: Semantic-Spatial Consistency and Category Optimization for Open-Vocabulary Segmentation: Yuhao Qing,

Yueying Wang,

Chaoyang Chen,

Weidong Zhang,

Jie Wen,

Xin Xu; [pdf]
[bibtex]
@InProceedings{Qing_2026_CVPR, author = {Qing, Yuhao and Wang, Yueying and Chen, Chaoyang and Zhang, Weidong and Wen, Jie and Xu, Xin}, title = {S2C2Seg: Semantic-Spatial Consistency and Category Optimization for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6293-6303} }
Do You Have Freestyle? Expressive Humanoid Locomotion via Audio Control: Zhe Li,

Cheng Chi,

Yangyang Wei,

Boan Zhu,

Tao Huang,

Zhenguo Sun,

Yibo Peng,

Pengwei Wang,

Zhongyuan Wang,

Fangzhou Liu,

Chang Xu,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhe and Chi, Cheng and Wei, Yangyang and Zhu, Boan and Huang, Tao and Sun, Zhenguo and Peng, Yibo and Wang, Pengwei and Wang, Zhongyuan and Liu, Fangzhou and Xu, Chang and Zhang, Shanghang}, title = {Do You Have Freestyle? Expressive Humanoid Locomotion via Audio Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {956-965} }
3DReflecNet: A Large-Scale Dataset for 3D Reconstruction of Reflective, Transparent, and Low-Texture Objects: Zhicheng Liang,

Haoyi Yu,

Boyan Li,

Dayou Zhang,

Zijian Cao,

Tianyi Gong,

Junhua Liu,

Shuguang Cui,

Fangxin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhicheng and Yu, Haoyi and Li, Boyan and Zhang, Dayou and Cao, Zijian and Gong, Tianyi and Liu, Junhua and Cui, Shuguang and Wang, Fangxin}, title = {3DReflecNet: A Large-Scale Dataset for 3D Reconstruction of Reflective, Transparent, and Low-Texture Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7244-7255} }
Spherical Leech Quantization for Visual Tokenization and Generation: Yue Zhao,

Hanwen Jiang,

Zhenlin Xu,

Chutong Yang,

Ehsan Adeli,

Philipp Kraehenbuehl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yue and Jiang, Hanwen and Xu, Zhenlin and Yang, Chutong and Adeli, Ehsan and Kraehenbuehl, Philipp}, title = {Spherical Leech Quantization for Visual Tokenization and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12913-12923} }
MetaSpectra+: A Compact Broadband Metasurface Camera for Snapshot Hyperspectral+ Imaging: Yuxuan Liu,

Wei Xu,

Qi Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxuan and Xu, Wei and Guo, Qi}, title = {MetaSpectra+: A Compact Broadband Metasurface Camera for Snapshot Hyperspectral+ Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {209-219} }
OASIS: On-Demand Hierarchical Event Memory for Streaming Video Reasoning: Zhijia Liang,

Jiaming Li,

Weikai Chen,

Yanhao Zhang,

Haonan Lu,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhijia and Li, Jiaming and Chen, Weikai and Zhang, Yanhao and Lu, Haonan and Li, Guanbin}, title = {OASIS: On-Demand Hierarchical Event Memory for Streaming Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2821-2831} }
Beyond What's Shared: Recovering Lost Unique Information from Intermediate Layers to Boost Multimodal Geo-Foundation Models: JangHyeon Lee,

Philipe Ambrozio Dias,

Yao-Yi Chiang,

Dalton Lunga; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, JangHyeon and Dias, Philipe Ambrozio and Chiang, Yao-Yi and Lunga, Dalton}, title = {Beyond What's Shared: Recovering Lost Unique Information from Intermediate Layers to Boost Multimodal Geo-Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1585-1595} }
VAD-GS: Visibility-Aware Densification for 3D Gaussian Splatting in Dynamic Urban Scenes: Yikang Zhang,

Rui Fan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yikang and Fan, Rui}, title = {VAD-GS: Visibility-Aware Densification for 3D Gaussian Splatting in Dynamic Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4953-4962} }
TANGO: Learning Distribution-wise Foundation Prior Consistency and Instance-wise Style Calibration for Medical Image Generalization: Chuang Liu,

Yichao Cao,

Xiu Su,

Haogang Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chuang and Cao, Yichao and Su, Xiu and Zhu, Haogang}, title = {TANGO: Learning Distribution-wise Foundation Prior Consistency and Instance-wise Style Calibration for Medical Image Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8545-8555} }
A Provable Energy-Guided Test-Time Defense Boosting Adversarial Robustness of Large Vision-Language Models: Mujtaba Hussain Mirza,

Antonio D'Orazio,

Odelia Melamed,

Iacopo Masi; [pdf] [supp]
[bibtex]
@InProceedings{Mirza_2026_CVPR, author = {Mirza, Mujtaba Hussain and D'Orazio, Antonio and Melamed, Odelia and Masi, Iacopo}, title = {A Provable Energy-Guided Test-Time Defense Boosting Adversarial Robustness of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8598-8609} }
TF-CADE: Foreground-Concentrated Text-Video Alignment for Zero-Shot Temporal Action Detection: Yearang Lee,

Ho-Joong Kim,

Seong-Whan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yearang and Kim, Ho-Joong and Lee, Seong-Whan}, title = {TF-CADE: Foreground-Concentrated Text-Video Alignment for Zero-Shot Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2843-2852} }
Emergent Outlier View Rejection in Visual Geometry Grounded Transformers: Jisang Han,

Sunghwan Hong,

Jaewoo Jung,

Wooseok Jang,

Honggyu An,

Qianqian Wang,

Seungryong Kim,

Chen Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jisang and Hong, Sunghwan and Jung, Jaewoo and Jang, Wooseok and An, Honggyu and Wang, Qianqian and Kim, Seungryong and Feng, Chen}, title = {Emergent Outlier View Rejection in Visual Geometry Grounded Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {427-437} }
TiViBench: Benchmarking Think-in-Video Reasoning for Video Generation: Harold Haodong Chen,

Disen Lan,

Wen-Jie Shu,

Qingyang Liu,

Zihan Wang,

Sirui Chen,

Wenkai Cheng,

Kanghao Chen,

Hongfei Zhang,

Zixin Zhang,

Rongjin Guo,

Yu Cheng,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Harold Haodong and Lan, Disen and Shu, Wen-Jie and Liu, Qingyang and Wang, Zihan and Chen, Sirui and Cheng, Wenkai and Chen, Kanghao and Zhang, Hongfei and Zhang, Zixin and Guo, Rongjin and Cheng, Yu and Chen, Ying-Cong}, title = {TiViBench: Benchmarking Think-in-Video Reasoning for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11403-11413} }
Artiverse: A Diverse and Physically Grounded Dataset for Articulated Objects: Denys Iliash,

Jiayi Liu,

Egor Fokin,

Qirui Wu,

Ali Mahdavi Amiri,

Manolis Savva,

Angel X. Chang; [pdf] [supp]
[bibtex]
@InProceedings{Iliash_2026_CVPR, author = {Iliash, Denys and Liu, Jiayi and Fokin, Egor and Wu, Qirui and Amiri, Ali Mahdavi and Savva, Manolis and Chang, Angel X.}, title = {Artiverse: A Diverse and Physically Grounded Dataset for Articulated Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8932-8942} }
Z-Order Transformer for Feed-Forward Gaussian Splatting: Can Wang,

Lei Liu,

Wei Jiang,

Dong Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Can and Liu, Lei and Jiang, Wei and Xu, Dong}, title = {Z-Order Transformer for Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7362-7371} }
MOFA-VTON: More Fashion Possibilities with Fine-Grained Adaptations in Virtual Try-On: Xiaoyu Han,

Chenyang Wang,

Jing Wang,

Shunyuan Zheng,

Quanling Meng,

Shengping Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xiaoyu and Wang, Chenyang and Wang, Jing and Zheng, Shunyuan and Meng, Quanling and Zhang, Shengping}, title = {MOFA-VTON: More Fashion Possibilities with Fine-Grained Adaptations in Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1895-1905} }
DepthFocus: Controllable Depth Estimation for See-Through Scenes: Junhong Min,

Jimin Kim,

Minwook Kim,

Cheol-Hui Min,

Youngpil Jeon,

Minyong Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Min_2026_CVPR, author = {Min, Junhong and Kim, Jimin and Kim, Minwook and Min, Cheol-Hui and Jeon, Youngpil and Choi, Minyong}, title = {DepthFocus: Controllable Depth Estimation for See-Through Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12595-12605} }
Enhancing Descriptive Captions with Visual Attributes for Multimodal Perception: Yanpeng Sun,

Jing Hao,

Ke Zhu,

Jiang-Jiang Liu,

Xiaofan Li,

Na Zhao,

Zechao Li,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yanpeng and Hao, Jing and Zhu, Ke and Liu, Jiang-Jiang and Li, Xiaofan and Zhao, Na and Li, Zechao and Wang, Jingdong}, title = {Enhancing Descriptive Captions with Visual Attributes for Multimodal Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1683-1694} }
NESTOR: A Nested MOE-based Neural Operator for Large-Scale PDE Pre-Training: Dengdi Sun,

Xiaoya Zhou,

Xiao Wang,

Hao Si,

Wanli Lyu,

Jin Tang,

Bin Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Dengdi and Zhou, Xiaoya and Wang, Xiao and Si, Hao and Lyu, Wanli and Tang, Jin and Luo, Bin}, title = {NESTOR: A Nested MOE-based Neural Operator for Large-Scale PDE Pre-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6147-6156} }
Think-as-You-See: Streaming Chain-of-Thought Reasoning for Large Vision-Language Models: Jialiang Zhang,

Junlong Tong,

Junyan Lin,

Hao Wu,

Yirong Sun,

Yunpu Ma,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jialiang and Tong, Junlong and Lin, Junyan and Wu, Hao and Sun, Yirong and Ma, Yunpu and Shen, Xiaoyu}, title = {Think-as-You-See: Streaming Chain-of-Thought Reasoning for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11998-12008} }
From Detection to Association: Learning Discriminative Object Embeddings for Multi-Object Tracking: Yuqing Shao,

Yuchen Yang,

Rui Yu,

Weilong Li,

Xu Guo,

Huaicheng Yan,

Wei Wang,

Xiao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Yuqing and Yang, Yuchen and Yu, Rui and Li, Weilong and Guo, Xu and Yan, Huaicheng and Wang, Wei and Sun, Xiao}, title = {From Detection to Association: Learning Discriminative Object Embeddings for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6878-6888} }
Fast-FoundationStereo: Real-Time Zero-Shot Stereo Matching: Bowen Wen,

Shaurya Dewan,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Bowen and Dewan, Shaurya and Birchfield, Stan}, title = {Fast-FoundationStereo: Real-Time Zero-Shot Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7513-7524} }
Omni-Attribute: Open-vocabulary Attribute Encoder for Visual Concept Personalization: Tsai-Shien Chen,

Aliaksandr Siarohin,

Gordon Guocheng Qian,

Kuan-Chieh Jackson Wang,

Egor Nemchinov,

Moayed Haji-Ali,

Riza Alp Guler,

Willi Menapace,

Ivan Skorokhodov,

Anil Kag,

Jun-Yan Zhu,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Qian, Gordon Guocheng and Wang, Kuan-Chieh Jackson and Nemchinov, Egor and Haji-Ali, Moayed and Guler, Riza Alp and Menapace, Willi and Skorokhodov, Ivan and Kag, Anil and Zhu, Jun-Yan and Tulyakov, Sergey}, title = {Omni-Attribute: Open-vocabulary Attribute Encoder for Visual Concept Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8194-8204} }
LangField4D: Learning Identity-Adaptive and Spatio-Temporal Continuous 4D Language Fields for Dynamic Scenes: Yichao Xu,

Qiaowei Miao,

Jinsheng Quan,

Wei Yang,

Zhihui Li,

Yawei Luo; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yichao and Miao, Qiaowei and Quan, Jinsheng and Yang, Wei and Li, Zhihui and Luo, Yawei}, title = {LangField4D: Learning Identity-Adaptive and Spatio-Temporal Continuous 4D Language Fields for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9558-9569} }
Representing 3D Faces with Learnable B-Spline Volumes: Prashanth Chandran,

Daoye Wang,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chandran_2026_CVPR, author = {Chandran, Prashanth and Wang, Daoye and Bolkart, Timo}, title = {Representing 3D Faces with Learnable B-Spline Volumes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13824-13834} }
ForceVLA2: Unleashing Hybrid Force-Position Control with Force Awareness for Contact-Rich Manipulation: Yang Li,

Zhaxizhuoma Zhaxizhuoma,

Hongru Jiang,

Junjie Xia,

Hongquan Zhang,

Jinda Du,

Yunsong Zhou,

Jia Zeng,

Ce Hao,

Jieji Ren,

Qiaojun Yu,

Cewu Lu,

Yu Qiao,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yang and Zhaxizhuoma, Zhaxizhuoma and Jiang, Hongru and Xia, Junjie and Zhang, Hongquan and Du, Jinda and Zhou, Yunsong and Zeng, Jia and Hao, Ce and Ren, Jieji and Yu, Qiaojun and Lu, Cewu and Qiao, Yu and Pang, Jiangmiao}, title = {ForceVLA2: Unleashing Hybrid Force-Position Control with Force Awareness for Contact-Rich Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8911-8920} }
TGSFormer: Scalable Temporal Gaussian Splatting for Embodied Semantic Scene Completion: Rui Qian,

Haozhi Cao,

Tianchen Deng,

Tianxin Hu,

Weixiang Guo,

Shenghai Yuan,

Lihua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Rui and Cao, Haozhi and Deng, Tianchen and Hu, Tianxin and Guo, Weixiang and Yuan, Shenghai and Xie, Lihua}, title = {TGSFormer: Scalable Temporal Gaussian Splatting for Embodied Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11881-11890} }
VISTA: A Test-Time Self-Improving Video Generation Agent: Do Xuan Long,

Xingchen Wan,

Hootan Nakhost,

Chen-Yu Lee,

Tomas Pfister,

Sercan Ö. Arik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Do Xuan and Wan, Xingchen and Nakhost, Hootan and Lee, Chen-Yu and Pfister, Tomas and Arik, Sercan \"O.}, title = {VISTA: A Test-Time Self-Improving Video Generation Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6021-6032} }
Dynamic-Static Decomposition for Novel View Synthesis of Dynamic Scenes with Spiking Neurons: Lingyun Dai,

Zehao Chen,

Yan Liu,

Shi Gu,

Peng Lin,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Lingyun and Chen, Zehao and Liu, Yan and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {Dynamic-Static Decomposition for Novel View Synthesis of Dynamic Scenes with Spiking Neurons}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8342-8352} }
MaskFocus: Focusing Policy Optimization on Critical Steps for Masked Image Generation: Guohui Zhang,

Hu Yu,

Xiaoxiao Ma,

Yaning Pan,

Hang Xu,

Jie Huang,

Feng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohui and Yu, Hu and Ma, Xiaoxiao and Pan, Yaning and Xu, Hang and Huang, Jie and Zhao, Feng}, title = {MaskFocus: Focusing Policy Optimization on Critical Steps for Masked Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5956-5966} }
MapRoute:Precise-Concept Erasing Mappers via Semantic Routing: Sihao Li,

Baixi Liang,

Shuohong Xia,

Yunyun Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Sihao and Liang, Baixi and Xia, Shuohong and Yang, Yunyun}, title = {MapRoute:Precise-Concept Erasing Mappers via Semantic Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10187-10196} }
Not All Birds Look The Same: Identity-Preserving Generation For Birds: Aaron Sun,

Oindrila Saha,

Subhransu Maji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Aaron and Saha, Oindrila and Maji, Subhransu}, title = {Not All Birds Look The Same: Identity-Preserving Generation For Birds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1983-1993} }
Geometric-Aware Hypergraph Reasoning for Novel Class Discovery in Point Cloud Segmentation: Zihao Zhang,

Aming Wu,

Yang Li,

Yahong Han,

Jialie Shen; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zihao and Wu, Aming and Li, Yang and Han, Yahong and Shen, Jialie}, title = {Geometric-Aware Hypergraph Reasoning for Novel Class Discovery in Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10006-10015} }
Discriminative Perception via Anchored Description for Reasoning Segmentation: Tao Yang,

Qing Zhou,

Yanliang Li,

Qi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Tao and Zhou, Qing and Li, Yanliang and Wang, Qi}, title = {Discriminative Perception via Anchored Description for Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13189-13198} }
Dual Band Thermal Videography: Separating Time-Varying Reflection and Emission Near Ambient Conditions: Sriram Narayanan,

Mani Ramanagopal,

Srinivasa Narasimhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Narayanan_2026_CVPR, author = {Narayanan, Sriram and Ramanagopal, Mani and Narasimhan, Srinivasa}, title = {Dual Band Thermal Videography: Separating Time-Varying Reflection and Emission Near Ambient Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {199-208} }
DRAMA: Next-Gen Dynamic Orchestration for Resilient Multi-Agent Ecosystems in Flux: Xinkui Zhao,

Yifan Zhang,

Sai Liu,

Naibo Wang,

Guanjie Cheng,

Yueshen Xu,

Chang Liu,

Shuiguang Deng,

Jianwei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xinkui and Zhang, Yifan and Liu, Sai and Wang, Naibo and Cheng, Guanjie and Xu, Yueshen and Liu, Chang and Deng, Shuiguang and Yin, Jianwei}, title = {DRAMA: Next-Gen Dynamic Orchestration for Resilient Multi-Agent Ecosystems in Flux}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1020-1030} }
Tavatar: Topology-Aware Gaussian Attribute Derivation for Animatable Human Avatars: Hailin Luo,

Yifan Yang,

Jiazhi Shu,

Zixiong Huang,

Qi Chen,

Qing Du,

Mingkui Tan; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Hailin and Yang, Yifan and Shu, Jiazhi and Huang, Zixiong and Chen, Qi and Du, Qing and Tan, Mingkui}, title = {Tavatar: Topology-Aware Gaussian Attribute Derivation for Animatable Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4087-4096} }
From Weights to Concepts: Data-Free Interpretability of CLIP via Singular Vector Decomposition: Francesco Gentile,

Nicola Dall'Asen,

Francesco Tonini,

Massimiliano Mancini,

Lorenzo Vaquero,

Elisa Ricci; [pdf] [supp]
[bibtex]
@InProceedings{Gentile_2026_CVPR, author = {Gentile, Francesco and Dall'Asen, Nicola and Tonini, Francesco and Mancini, Massimiliano and Vaquero, Lorenzo and Ricci, Elisa}, title = {From Weights to Concepts: Data-Free Interpretability of CLIP via Singular Vector Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2895-2906} }
Beyond Rule-Based Agents: Active Markov Games for Realistic Multi-Agent Interaction in Autonomous Driving: Yuan Gui,

Hongchen Luo,

Jiao Wang,

Liqi Qu; [pdf] [supp]
[bibtex]
@InProceedings{Gui_2026_CVPR, author = {Gui, Yuan and Luo, Hongchen and Wang, Jiao and Qu, Liqi}, title = {Beyond Rule-Based Agents: Active Markov Games for Realistic Multi-Agent Interaction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10689-10698} }
Spectral Conformal Risk Control: Distribution-Free Tail Guarantees via Bayesian Quadrature: Mohammad Mahdi Kazemi Esfeh,

Qi Yan,

Yongxing Zhang,

Zahra Gholami,

Renjie Liao,

Purang Abolmaesumi; [pdf] [supp]
[bibtex]
@InProceedings{Esfeh_2026_CVPR, author = {Esfeh, Mohammad Mahdi Kazemi and Yan, Qi and Zhang, Yongxing and Gholami, Zahra and Liao, Renjie and Abolmaesumi, Purang}, title = {Spectral Conformal Risk Control: Distribution-Free Tail Guarantees via Bayesian Quadrature}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12977-12986} }
SketchVL: Policy Optimization via Fine-Grained Credit Assignment for Chart Understanding and More: Muye Huang,

Lingling Zhang,

Yifei Li,

Yaqiang Wu,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Muye and Zhang, Lingling and Li, Yifei and Wu, Yaqiang and Liu, Jun}, title = {SketchVL: Policy Optimization via Fine-Grained Credit Assignment for Chart Understanding and More}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4738-4748} }
P-Flow: Prompting Visual Effects Generation: Rui Zhao,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rui and Shou, Mike Zheng}, title = {P-Flow: Prompting Visual Effects Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9149-9160} }
One Token, Two Fates: A Unified Framework via Vision Token Manipulation Against MLLMs Hallucination: Zhan Fa,

Yue Duan,

Jian Zhang,

Lei Qi,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fa_2026_CVPR, author = {Fa, Zhan and Duan, Yue and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {One Token, Two Fates: A Unified Framework via Vision Token Manipulation Against MLLMs Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11106-11115} }
GRPO-Guard: Mitigating Implicit Over-Optimization in Flow Matching via Regulated Clipping: Jing Wang,

Jiajun Liang,

Jie Liu,

Henglin Liu,

Gongye Liu,

Jun Zheng,

Wanyuan Pang,

Ao Ma,

Zhenyu Xie,

Xintao Wang,

Meng Wang,

Pengfei Wan,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jing and Liang, Jiajun and Liu, Jie and Liu, Henglin and Liu, Gongye and Zheng, Jun and Pang, Wanyuan and Ma, Ao and Xie, Zhenyu and Wang, Xintao and Wang, Meng and Wan, Pengfei and Liang, Xiaodan}, title = {GRPO-Guard: Mitigating Implicit Over-Optimization in Flow Matching via Regulated Clipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5988-5998} }
Efficient Weighted Sampling via Score-based Generative Models: Heasung Kim,

Taekyun Lee,

Hyeji Kim,

Gustavo De Veciana; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Heasung and Lee, Taekyun and Kim, Hyeji and De Veciana, Gustavo}, title = {Efficient Weighted Sampling via Score-based Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1155-1166} }
FastLightGen: Fast and Light Video Generation with Fewer Steps and Parameters: Shitong Shao,

Yufei Gu,

Zeke Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Shitong and Gu, Yufei and Xie, Zeke}, title = {FastLightGen: Fast and Light Video Generation with Fewer Steps and Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2104-2114} }
Rotation Invariant and Symmetry Aware Pixel Difference Network for Remote Sensing Object Detection: Jialei Zhan,

Li Liu,

Jiehua Zhang,

Yuhang Xie,

Yongxiang Liu,

Jiangming Chen,

Ming-Ming Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jialei and Liu, Li and Zhang, Jiehua and Xie, Yuhang and Liu, Yongxiang and Chen, Jiangming and Cheng, Ming-Ming}, title = {Rotation Invariant and Symmetry Aware Pixel Difference Network for Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13264-13274} }
Gamba: Mamba-based graph convolutional network with dynamic graph topology learning for action recognition: Rouyi Zhou,

Yangzhi Wu,

Jiajun Wen,

Can Gao,

Feng Liu,

Zhihui Lai,

Linlin Shen; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Rouyi and Wu, Yangzhi and Wen, Jiajun and Gao, Can and Liu, Feng and Lai, Zhihui and Shen, Linlin}, title = {Gamba: Mamba-based graph convolutional network with dynamic graph topology learning for action recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6065-6074} }
HandX: Scaling Bimanual Motion and Interaction Generation: Zimu Zhang,

Yucheng Zhang,

Xiyan Xu,

Ziyin Wang,

Sirui Xu,

Kai Zhou,

Bing Zhou,

Chuan Guo,

Jian Wang,

Yu-Xiong Wang,

Liang-Yan Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zimu and Zhang, Yucheng and Xu, Xiyan and Wang, Ziyin and Xu, Sirui and Zhou, Kai and Zhou, Bing and Guo, Chuan and Wang, Jian and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {HandX: Scaling Bimanual Motion and Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2274-2284} }
Time-Specialized Event-Image Alignment for Blur-to-Video Decomposition: Zhijing Sun,

Senyan Xu,

Ruixuan Jiang,

Kean Liu,

Runze Tian,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhijing and Xu, Senyan and Jiang, Ruixuan and Liu, Kean and Tian, Runze and Fu, Xueyang and Zha, Zheng-Jun}, title = {Time-Specialized Event-Image Alignment for Blur-to-Video Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8045-8055} }
From Manuals to Actions: A Unified VLA Model for Chain-of-Thought Manual Generation and Robotic Manipulation: Chenyang Gu,

Jiaming Liu,

Hao Chen,

Runzhong Huang,

Qingpo Wuwu,

Xiaoqi Li,

Zhuoyang Liu,

Ying Li,

Renrui Zhang,

Peng Jia,

Pheng-Ann Heng,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Chenyang and Liu, Jiaming and Chen, Hao and Huang, Runzhong and Wuwu, Qingpo and Li, Xiaoqi and Liu, Zhuoyang and Li, Ying and Zhang, Renrui and Jia, Peng and Heng, Pheng-Ann and Zhang, Shanghang}, title = {From Manuals to Actions: A Unified VLA Model for Chain-of-Thought Manual Generation and Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13540-13552} }
RARE: Learn to RAnk and REtrieve for Monocular 3D Object Detection: Hyeonjeong Park,

Peixi Xiong,

Xiaoqian Ruan,

Dian Jia,

Pei Yu,

Wei Tang; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Hyeonjeong and Xiong, Peixi and Ruan, Xiaoqian and Jia, Dian and Yu, Pei and Tang, Wei}, title = {RARE: Learn to RAnk and REtrieve for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11556-11566} }
Reasoning Diffusion for Unpaired Test Time Out-of-distribution Text-Image to Video Generation: Zirui Pan,

Xin Wang,

Yipeng Zhang,

Hong Chen,

Kecheng Zheng,

Wenwu Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zirui and Wang, Xin and Zhang, Yipeng and Chen, Hong and Zheng, Kecheng and Zhu, Wenwu}, title = {Reasoning Diffusion for Unpaired Test Time Out-of-distribution Text-Image to Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {636-646} }
Disentangling to Re-couple: Resolving the Similarity-Controllability Paradox in Subject-Driven Text-to-Image Generation: Shuang Li,

Chao Deng,

Hang Chen,

Liqun Liu,

Zhenyu Hu,

Te Cao,

Mengge Xue,

Yuan Chen,

Peng Shu,

Huan Yu,

Jie Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shuang and Deng, Chao and Chen, Hang and Liu, Liqun and Hu, Zhenyu and Cao, Te and Xue, Mengge and Chen, Yuan and Shu, Peng and Yu, Huan and Jiang, Jie}, title = {Disentangling to Re-couple: Resolving the Similarity-Controllability Paradox in Subject-Driven Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7741-7751} }
ReWeaver: Towards Simulation-Ready and Topology-Accurate Garment Reconstruction: Ming Li,

Hui Shan,

Kai Zheng,

Chentao Shen,

Siyu Liu,

Yanwei Fu,

Zhen Chen,

Xiangru Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ming and Shan, Hui and Zheng, Kai and Shen, Chentao and Liu, Siyu and Fu, Yanwei and Chen, Zhen and Huang, Xiangru}, title = {ReWeaver: Towards Simulation-Ready and Topology-Accurate Garment Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4122-4131} }
PROMPTMINER: Black-Box Prompt Stealing against Text-to-Image Generative Models via Reinforcement Learning and VLM-Guided Optimization: Mingzhe Li,

Renhao Zhang,

Zhiyang Wen,

Siqi Pan,

Bruno Castro da Silva,

Juan Zhai,

Shiqing Ma; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingzhe and Zhang, Renhao and Wen, Zhiyang and Pan, Siqi and da Silva, Bruno Castro and Zhai, Juan and Ma, Shiqing}, title = {PROMPTMINER: Black-Box Prompt Stealing against Text-to-Image Generative Models via Reinforcement Learning and VLM-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7795-7804} }
S2D: Selective Spectral Decay for Quantization-Friendly Conditioning of Neural Activations: Arnav Chavan,

Nahush Lele,

Udbhav Bamba,

Sankalp Dayal,

Aditi Raghunathan,

Deepak Gupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chavan_2026_CVPR, author = {Chavan, Arnav and Lele, Nahush and Bamba, Udbhav and Dayal, Sankalp and Raghunathan, Aditi and Gupta, Deepak}, title = {S2D: Selective Spectral Decay for Quantization-Friendly Conditioning of Neural Activations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12074-12083} }
CaliTex: Geometry-Calibrated Attention for View-Coherent 3D Texture Generation: Chenyu Liu,

Hongze Chen,

Jingzhi Bao,

Lingting Zhu,

Runze Zhang,

Weikai Chen,

Zeyu Hu,

Yingda Yin,

Keyang Luo,

Xin Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chenyu and Chen, Hongze and Bao, Jingzhi and Zhu, Lingting and Zhang, Runze and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Luo, Keyang and Wang, Xin}, title = {CaliTex: Geometry-Calibrated Attention for View-Coherent 3D Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5923-5933} }
DynamicTree: Interactive Real Tree Animation via Sparse Voxel Spectrum: Yaokun Li,

Lihe Ding,

Xiao Chen,

Guang Tan,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yaokun and Ding, Lihe and Chen, Xiao and Tan, Guang and Xue, Tianfan}, title = {DynamicTree: Interactive Real Tree Animation via Sparse Voxel Spectrum}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1241-1251} }
Yo'City: Personalized and Boundless 3D Realistic City Scene Generation via Self-Critic Expansion: Keyang Lu,

Sifan Zhou,

Hongbin Xu,

Gang Xu,

Zhifei Yang,

Yikai Wang,

Zhen Xiao,

Jieyi Long,

Ming Li; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Keyang and Zhou, Sifan and Xu, Hongbin and Xu, Gang and Yang, Zhifei and Wang, Yikai and Xiao, Zhen and Long, Jieyi and Li, Ming}, title = {Yo'City: Personalized and Boundless 3D Realistic City Scene Generation via Self-Critic Expansion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3219-3230} }
LVLM-Aided Alignment of Task-Specific Vision Models: Alexander Koebler,

Lukas Kuhn,

Ingo Thon,

Florian Buettner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koebler_2026_CVPR, author = {Koebler, Alexander and Kuhn, Lukas and Thon, Ingo and Buettner, Florian}, title = {LVLM-Aided Alignment of Task-Specific Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7837-7846} }
Unified Latent Space for Understanding and Generation via Semantic Auto-encoder: Xiaojie Li,

Yang Zhao,

Ming Li,

Yancheng Zhang,

Zonglin Lyu,

Yunpeng Chen,

Rui Wang,

Daquan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaojie and Zhao, Yang and Li, Ming and Zhang, Yancheng and Lyu, Zonglin and Chen, Yunpeng and Wang, Rui and Zhou, Daquan}, title = {Unified Latent Space for Understanding and Generation via Semantic Auto-encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2115-2124} }
Talking Together: Synthesizing Co-Located 3D Conversations from Audio: Mengyi Shan,

Shouchieh Chang,

Ziqian Bai,

Shichen Liu,

Yinda Zhang,

Luchuan Song,

Rohit Pandey,

Sean Fanello,

Zeng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Mengyi and Chang, Shouchieh and Bai, Ziqian and Liu, Shichen and Zhang, Yinda and Song, Luchuan and Pandey, Rohit and Fanello, Sean and Huang, Zeng}, title = {Talking Together: Synthesizing Co-Located 3D Conversations from Audio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3965-3977} }
PHANTOM: Physics-Infused Video Generation via Joint Modeling of Visual and Latent Physical Dynamics: Ying Shen,

Jerry Xiong,

Tianjiao Yu,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Ying and Xiong, Jerry and Yu, Tianjiao and Lourentzou, Ismini}, title = {PHANTOM: Physics-Infused Video Generation via Joint Modeling of Visual and Latent Physical Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11185-11194} }
Learning Multi-View Spatial Reasoning from Cross-View Relations: Suchae Jeong,

Jaehwi Song,

Haeone Lee,

Hanna Kim,

Jian Kim,

Dongjun Lee,

Dong Kyu Shin,

Changyeon Kim,

Dongyoon Hahm,

Woogyeol Jin,

Juheon Choi,

Kimin Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Suchae and Song, Jaehwi and Lee, Haeone and Kim, Hanna and Kim, Jian and Lee, Dongjun and Shin, Dong Kyu and Kim, Changyeon and Hahm, Dongyoon and Jin, Woogyeol and Choi, Juheon and Lee, Kimin}, title = {Learning Multi-View Spatial Reasoning from Cross-View Relations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2570-2581} }
4D Primitive-Mache: Glueing Primitives for Persistent 4D Scene Reconstruction: Kirill Mazur,

Marwan Taher,

Andrew J. Davison; [pdf] [supp]
[bibtex]
@InProceedings{Mazur_2026_CVPR, author = {Mazur, Kirill and Taher, Marwan and Davison, Andrew J.}, title = {4D Primitive-Mache: Glueing Primitives for Persistent 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7372-7381} }
HandDreamer: Zero-Shot Text to 3D Hand Model Generation using Corrective Hand Shape Guidance: Green Rosh,

Prateek Kukreja,

Vishakha SR,

Pawan Prasad B H; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosh_2026_CVPR, author = {Rosh, Green and Kukreja, Prateek and Vishakha, SR and H, Pawan Prasad B}, title = {HandDreamer: Zero-Shot Text to 3D Hand Model Generation using Corrective Hand Shape Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8846-8856} }
Edit-aware RAW reconstruction: Abhijith Punnappurath,

Luxi Zhao,

Ke Zhao,

Hue Nguyen,

Radek Grzeszczuk,

Michael S. Brown; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Punnappurath_2026_CVPR, author = {Punnappurath, Abhijith and Zhao, Luxi and Zhao, Ke and Nguyen, Hue and Grzeszczuk, Radek and Brown, Michael S.}, title = {Edit-aware RAW reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8418-8427} }
View-Aware Semantic Alignment for Aerial-Ground Person Re-Identification: Quan Zhang,

Zeqiang Cai,

Peiming Zhao,

Jingze Wu,

Cailun Wu,

Hongbo Chen,

Jianhuang Lai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Quan and Cai, Zeqiang and Zhao, Peiming and Wu, Jingze and Wu, Cailun and Chen, Hongbo and Lai, Jianhuang}, title = {View-Aware Semantic Alignment for Aerial-Ground Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4383-4392} }
EmoStyle: Emotion-Driven Image Stylization: Jingyuan Yang,

Zihuan Bai,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jingyuan and Bai, Zihuan and Huang, Hui}, title = {EmoStyle: Emotion-Driven Image Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {594-603} }
Prospective Dynamic 3D MRI Reconstruction via Latent-Space Motion Tracking from Single Measurement: Lixuan Chen,

Zhongnan Liu,

Jesse Hamilton,

James M. Balter,

Jeong Joon Park,

Liyue Shen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Lixuan and Liu, Zhongnan and Hamilton, Jesse and Balter, James M. and Park, Jeong Joon and Shen, Liyue}, title = {Prospective Dynamic 3D MRI Reconstruction via Latent-Space Motion Tracking from Single Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5627-5636} }
STARFlow-V: End-to-End Video Generative Modeling with Autoregressive Normalizing Flows: Jiatao Gu,

Ying Shen,

Tianrong Chen,

Laurent Dinh,

Yuyang Wang,

Miguel Angel Bautista,

David Berthelot,

Josh Susskind,

Shuangfei Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Jiatao and Shen, Ying and Chen, Tianrong and Dinh, Laurent and Wang, Yuyang and Bautista, Miguel Angel and Berthelot, David and Susskind, Josh and Zhai, Shuangfei}, title = {STARFlow-V: End-to-End Video Generative Modeling with Autoregressive Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9084-9094} }
PixARMesh: Autoregressive Mesh-Native Single-View Scene Reconstruction: Xiang Zhang,

Sohyun Yoo,

Hongrui Wu,

Chuan Li,

Jianwen Xie,

Zhuowen Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiang and Yoo, Sohyun and Wu, Hongrui and Li, Chuan and Xie, Jianwen and Tu, Zhuowen}, title = {PixARMesh: Autoregressive Mesh-Native Single-View Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5881-5891} }
Bayesian Decomposition and Semantic Completion for Few-shot Semantic Segmentation: Guangchen Shi,

Yirui Wu,

Wei Zhu,

Tao Wang,

Hao Zhang,

Bo Li,

Tong Lu; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Guangchen and Wu, Yirui and Zhu, Wei and Wang, Tao and Zhang, Hao and Li, Bo and Lu, Tong}, title = {Bayesian Decomposition and Semantic Completion for Few-shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12354-12363} }
Natural Human Motion Recovery by Aligning High-Order Temporal Dynamics from Monocular Videos: Dingkun Wei,

Zehong Shen,

Yan Xia,

Georgios Pavlakos,

Yujun Shen,

Xiaowei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Dingkun and Shen, Zehong and Xia, Yan and Pavlakos, Georgios and Shen, Yujun and Zhou, Xiaowei}, title = {Natural Human Motion Recovery by Aligning High-Order Temporal Dynamics from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7187-7196} }
Ego: Embedding-Guided Personalization of Vision-Language Models: Soroush Seifi,

Simon Gardier,

Vaggelis Dorovatas,

Daniel Olmeda Reino,

Rahaf Aljundi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seifi_2026_CVPR, author = {Seifi, Soroush and Gardier, Simon and Dorovatas, Vaggelis and Reino, Daniel Olmeda and Aljundi, Rahaf}, title = {Ego: Embedding-Guided Personalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11674-11683} }
HyperST: Hierarchical Hyperbolic Learning for Spatial Transcriptomics Prediction: Chen Zhang,

Yilu An,

Ying Chen,

Hao Li,

Xitong Ling,

Lihao Liu,

Junjun He,

Yuxiang Lin,

Zihui Wang,

Rongshan Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chen and An, Yilu and Chen, Ying and Li, Hao and Ling, Xitong and Liu, Lihao and He, Junjun and Lin, Yuxiang and Wang, Zihui and Yu, Rongshan}, title = {HyperST: Hierarchical Hyperbolic Learning for Spatial Transcriptomics Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5730-5739} }
STAC: Plug-and-Play Spatio-Temporal Aware Cache Compression for Streaming 3D Reconstruction: Runze Wang,

Yuxuan Song,

Youcheng Cai,

Ligang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Runze and Song, Yuxuan and Cai, Youcheng and Liu, Ligang}, title = {STAC: Plug-and-Play Spatio-Temporal Aware Cache Compression for Streaming 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7567-7576} }
Context-Nav: Context-Driven Exploration and Viewpoint-Aware 3D Spatial Reasoning for Instance Navigation: Won Shik Jang,

Ue-Hwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Won Shik and Kim, Ue-Hwan}, title = {Context-Nav: Context-Driven Exploration and Viewpoint-Aware 3D Spatial Reasoning for Instance Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9626-9636} }
QVGGT: Post-Training Quantized Visual Geometry Grounded Transformer: Zhizhen Pan,

Hesong Wang,

Huan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zhizhen and Wang, Hesong and Wang, Huan}, title = {QVGGT: Post-Training Quantized Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7536-7545} }
R-4B: Incentivizing General-Purpose Auto-Thinking in MLLMs via Bi-Mode Annealing and Reinforce Learning: Qi Yang,

Bolin Ni,

Shiming Xiang,

Houwen Peng; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Qi and Ni, Bolin and Xiang, Shiming and Peng, Houwen}, title = {R-4B: Incentivizing General-Purpose Auto-Thinking in MLLMs via Bi-Mode Annealing and Reinforce Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7891-7900} }
Seeing through boxes: Non-Line-of-Sight 3D Reconstruction from Radar Signals: Jiachen Lu,

Hailan Shanbhag,

Haitham Al Hassanieh; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jiachen and Shanbhag, Hailan and Al Hassanieh, Haitham}, title = {Seeing through boxes: Non-Line-of-Sight 3D Reconstruction from Radar Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1221-1230} }
Efficient Training for Human Video Generation with Entropy-Guided Prioritized Progressive Learning: Changlin Li,

Jiawei Zhang,

Shuhao Liu,

Sihao Lin,

Zeyi Shi,

Zhihui Li,

Xiaojun Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Changlin and Zhang, Jiawei and Liu, Shuhao and Lin, Sihao and Shi, Zeyi and Li, Zhihui and Chang, Xiaojun}, title = {Efficient Training for Human Video Generation with Entropy-Guided Prioritized Progressive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5967-5977} }
UniRain: Unified Image Deraining with RAG-based Dataset Distillation and Multi-objective Reweighted Optimization: Qianfeng Yang,

Qiyuan Guan,

Xiang Chen,

Jiyu Jin,

Guiyue Jin,

Jiangxin Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Qianfeng and Guan, Qiyuan and Chen, Xiang and Jin, Jiyu and Jin, Guiyue and Dong, Jiangxin}, title = {UniRain: Unified Image Deraining with RAG-based Dataset Distillation and Multi-objective Reweighted Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12428-12437} }
FlowDirector: Training-Free Flow Steering for Precise Text-to-Video Editing: Guangzhao Li,

Yanming Yang,

Chenxi Song,

Xiaohong Liu,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Guangzhao and Yang, Yanming and Song, Chenxi and Liu, Xiaohong and Zhang, Chi}, title = {FlowDirector: Training-Free Flow Steering for Precise Text-to-Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7805-7815} }
ImageRAGTurbo: Towards One-step Text-to-Image Generation with Retrieval-Augmented Diffusion Models: Peijie Qiu,

Hariharan Ramshankar,

Arnau Ramisa,

Amit Kumar K C,

René Vidal,

Vamsi Salaka,

Rahul Bhagat; [pdf] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Peijie and Ramshankar, Hariharan and Ramisa, Arnau and C, Amit Kumar K and Vidal, Ren\'e and Salaka, Vamsi and Bhagat, Rahul}, title = {ImageRAGTurbo: Towards One-step Text-to-Image Generation with Retrieval-Augmented Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {529-539} }
World in a Frame: Understanding Culture Mixing as a New Challenge for Vision-Language Models: Eunsu Kim,

Junyeong Park,

Na Min An,

Junseong Kim,

Hitesh Laxmichand Patel,

Jiho Jin,

Julia Kruk,

Amit Agarwal,

Srikant Panda,

Fenal Ashokbhai Ilasariya,

Hyunjung Shim,

Alice Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Eunsu and Park, Junyeong and An, Na Min and Kim, Junseong and Patel, Hitesh Laxmichand and Jin, Jiho and Kruk, Julia and Agarwal, Amit and Panda, Srikant and Ilasariya, Fenal Ashokbhai and Shim, Hyunjung and Oh, Alice}, title = {World in a Frame: Understanding Culture Mixing as a New Challenge for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2477-2489} }
NuWa: Deriving Lightweight Class-Specific Vision Transformers for Edge Devices: Ziteng Wei,

Qiang He,

Bing Li,

Feifei Chen,

Hai Jin,

Yun Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Ziteng and He, Qiang and Li, Bing and Chen, Feifei and Jin, Hai and Yang, Yun}, title = {NuWa: Deriving Lightweight Class-Specific Vision Transformers for Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {156-166} }
SCAPO: Self-Supervised Category-Level Articulated Pose Estimation from a Single 3D Observation: Can Zhang,

Gim Hee Lee; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Can and Lee, Gim Hee}, title = {SCAPO: Self-Supervised Category-Level Articulated Pose Estimation from a Single 3D Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13802-13811} }
Frequency-domain Manipulation for Face Obfuscation: Jintae Kim,

Keunsoo Ko,

Chang-Su Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jintae and Ko, Keunsoo and Kim, Chang-Su}, title = {Frequency-domain Manipulation for Face Obfuscation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10241-10250} }
E3AD: An Emotion-Aware Vision-Language-Action Model for Human-Centric End-to-End Autonomous Driving: Yihong Tang,

Haicheng Liao,

Tong Nie,

Junlin He,

Ao Qu,

Kehua Chen,

Wei Ma,

Zhenning Li,

Lijun Sun,

Chengzhong Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yihong and Liao, Haicheng and Nie, Tong and He, Junlin and Qu, Ao and Chen, Kehua and Ma, Wei and Li, Zhenning and Sun, Lijun and Xu, Chengzhong}, title = {E3AD: An Emotion-Aware Vision-Language-Action Model for Human-Centric End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10610-10620} }; Back