CVPR 2026 Open Access Repository

Papers

Back
Generalizable Structure-Aware Keypoint Correspondence for Category-Unified 3D Single Object Tracking: Jie Xiao,

Yinchao Ma,

Yuyang Tang,

Dengqing Yang,

Jianpeng Yang,

Xu Zhou,

Qiao Li,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Jie and Ma, Yinchao and Tang, Yuyang and Yang, Dengqing and Yang, Jianpeng and Zhou, Xu and Li, Qiao and Yang, Wenfei and Zhang, Tianzhu}, title = {Generalizable Structure-Aware Keypoint Correspondence for Category-Unified 3D Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28156-28166} }
Unique Lives, Shared World: Learning from Single-Life Videos: Tengda Han,

Sayna Ebrahimi,

Dilara Gokay,

Li Yang Ku,

Maks Ovsjanikov,

Iva Babukova,

Daniel Zoran,

Viorica Patraucean,

Joao Carreira,

Andrew Zisserman,

Dima Damen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Tengda and Ebrahimi, Sayna and Gokay, Dilara and Ku, Li Yang and Ovsjanikov, Maks and Babukova, Iva and Zoran, Daniel and Patraucean, Viorica and Carreira, Joao and Zisserman, Andrew and Damen, Dima}, title = {Unique Lives, Shared World: Learning from Single-Life Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24021-24030} }
EcoSplat: Efficiency-controllable Feed-forward 3D Gaussian Splatting from Multi-view Images: Minh-Quan Viet Bui,

Jongmin Park,

Juan Luis Gonzalez,

Jaeho Moon,

Jihyong Oh,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bui_2026_CVPR, author = {Bui, Minh-Quan Viet and Park, Jongmin and Gonzalez, Juan Luis and Moon, Jaeho and Oh, Jihyong and Kim, Munchurl}, title = {EcoSplat: Efficiency-controllable Feed-forward 3D Gaussian Splatting from Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26010-26020} }
UniComp: Rethinking Video Compression Through Informational Uniqueness: Chao Yuan,

Shimin Chen,

Minliang Lin,

Limeng Qiao,

Guanglu Wan,

Lin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Chao and Chen, Shimin and Lin, Minliang and Qiao, Limeng and Wan, Guanglu and Ma, Lin}, title = {UniComp: Rethinking Video Compression Through Informational Uniqueness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18609-18618} }
TALON: Test-time Adaptive Learning for On-the-Fly Category Discovery: Yanan Wu,

Yuhan Yan,

Tailai Chen,

Zhixiang Chi,

ZiZhang Wu,

Yi Jin,

Yang Wang,

Zhenbo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yanan and Yan, Yuhan and Chen, Tailai and Chi, Zhixiang and Wu, ZiZhang and Jin, Yi and Wang, Yang and Li, Zhenbo}, title = {TALON: Test-time Adaptive Learning for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22259-22269} }
SenseSearch: Empowering Vision-Language Models with High-Resolution Agentic Search-Reasoning via Reinforcement Learning: Yong Xien Chng,

Tao Hu,

Wenwen Tong,

Xueheng Li,

Jiandong Chen,

Haojia Yu,

Jiefan Lu,

Hewei Guo,

Hanming Deng,

Chengjun Xie,

Gao Huang,

Lewei Lu; [pdf] [supp]
[bibtex]
@InProceedings{Chng_2026_CVPR, author = {Chng, Yong Xien and Hu, Tao and Tong, Wenwen and Li, Xueheng and Chen, Jiandong and Yu, Haojia and Lu, Jiefan and Guo, Hewei and Deng, Hanming and Xie, Chengjun and Huang, Gao and Lu, Lewei}, title = {SenseSearch: Empowering Vision-Language Models with High-Resolution Agentic Search-Reasoning via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26508-26517} }
Quant Experts: Token-aware Adaptive Error Reconstruction with Mixture of Experts for Large Vision-Language Models Quantization: Chenwei Jia,

Baoting Li,

Xuchong Zhang,

Mingzhuo Wei,

Bochen Lin,

Hongbin Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Chenwei and Li, Baoting and Zhang, Xuchong and Wei, Mingzhuo and Lin, Bochen and Sun, Hongbin}, title = {Quant Experts: Token-aware Adaptive Error Reconstruction with Mixture of Experts for Large Vision-Language Models Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24716-24726} }
Dr.Occ: Depth- and Region-Guided 3D Occupancy from Surround-View Cameras for Autonomous Driving: Xubo Zhu,

Haoyang Zhang,

Fei He,

Rui Wu,

Yanhu Shan,

Wen Yang,

Huai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xubo and Zhang, Haoyang and He, Fei and Wu, Rui and Shan, Yanhu and Yang, Wen and Yu, Huai}, title = {Dr.Occ: Depth- and Region-Guided 3D Occupancy from Surround-View Cameras for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28610-28619} }
AdaSpot: Spend Resolution Where It Matters for Precise Event Spotting: Artur Xarles,

Sergio Escalera,

Thomas B. Moeslund,

Albert Clapés; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xarles_2026_CVPR, author = {Xarles, Artur and Escalera, Sergio and Moeslund, Thomas B. and Clap\'es, Albert}, title = {AdaSpot: Spend Resolution Where It Matters for Precise Event Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24010-24020} }
Prompt-Anchored Vision-Text Distillation for Lifelong Person Re-identification: Wen Wen,

Hao Chen,

Shiliang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Wen and Chen, Hao and Zhang, Shiliang}, title = {Prompt-Anchored Vision-Text Distillation for Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18503-18512} }
From Spots to Pixels: Dense Spatial Gene Expression Prediction from Histology Images: Ruikun Zhang,

Yan Yang,

Liyuan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruikun and Yang, Yan and Pan, Liyuan}, title = {From Spots to Pixels: Dense Spatial Gene Expression Prediction from Histology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19791-19800} }
Rethinking Pose Refinement in 3D Gaussian Splatting under Pose Prior and Geometric Uncertainty: Mangyu Kong,

Jaewon Lee,

Seongwon Lee,

Euntai Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Mangyu and Lee, Jaewon and Lee, Seongwon and Kim, Euntai}, title = {Rethinking Pose Refinement in 3D Gaussian Splatting under Pose Prior and Geometric Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25958-25968} }
Pluggable Pruning with Contiguous Layer Distillation for Diffusion Transformers: Jian Ma,

Qirong Peng,

Xujie Zhu,

Peixing Xie,

Chen Chen,

Haonan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Jian and Peng, Qirong and Zhu, Xujie and Xie, Peixing and Chen, Chen and Lu, Haonan}, title = {Pluggable Pruning with Contiguous Layer Distillation for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18640-18650} }
PersonaVLM: Long-Term Personalized Multimodal LLMs: Chang Nie,

Chaoyou Fu,

Yifan Zhang,

Haihua Yang,

Caifeng Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Chang and Fu, Chaoyou and Zhang, Yifan and Yang, Haihua and Shan, Caifeng}, title = {PersonaVLM: Long-Term Personalized Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15000-15009} }
Hierarchical Enhancement of Semantic Priors for Disentangled Text-Driven Motion Generation: Wenhan Lv,

Shaopan Wang,

Xiangyu Wu,

Tianchu Hang,

Zhongquan Jian,

Qingqiang Wu; [pdf] [supp]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Wenhan and Wang, Shaopan and Wu, Xiangyu and Hang, Tianchu and Jian, Zhongquan and Wu, Qingqiang}, title = {Hierarchical Enhancement of Semantic Priors for Disentangled Text-Driven Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14844-14853} }
Scalable Object Relation Encoding for Better 3D Spatial Reasoning in Large Language Models: Shengli Zhou,

Minghang Zheng,

Feng Zheng,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shengli and Zheng, Minghang and Zheng, Feng and Liu, Yang}, title = {Scalable Object Relation Encoding for Better 3D Spatial Reasoning in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16758-16767} }
Multimodal Protein Language Models for Enzyme Kinetic Parameters: From Substrate Recognition to Conformational Adaptation: Fei Wang,

Xinye Zheng,

Kun Li,

Yanyan Wei,

Yuxin Liu,

Ganpeng Hu,

Tong Bao,

Jingwen Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Fei and Zheng, Xinye and Li, Kun and Wei, Yanyan and Liu, Yuxin and Hu, Ganpeng and Bao, Tong and Yang, Jingwen}, title = {Multimodal Protein Language Models for Enzyme Kinetic Parameters: From Substrate Recognition to Conformational Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15829-15839} }
FloVerse: Floor Plan-Guided Multi-Modal Navigation: Weiqi Huang,

Shuangyi Dong,

Jiaxin Li,

Yifei Guo,

Zan Wang,

Wei Liang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Weiqi and Dong, Shuangyi and Li, Jiaxin and Guo, Yifei and Wang, Zan and Liang, Wei}, title = {FloVerse: Floor Plan-Guided Multi-Modal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15156-15165} }
ConceptPose: Training-Free Zero-Shot Object Pose Estimation using Concept Vectors: Liming Kuang,

Yordanka Velikova,

Mahdi Saleh,

Jan-Nico Zaech,

Danda Pani Paudel,

Benjamin Busam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2026_CVPR, author = {Kuang, Liming and Velikova, Yordanka and Saleh, Mahdi and Zaech, Jan-Nico and Paudel, Danda Pani and Busam, Benjamin}, title = {ConceptPose: Training-Free Zero-Shot Object Pose Estimation using Concept Vectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26582-26592} }
AdaBet: Gradient-free Layer Selection for Efficient Training of Deep Neural Networks: Irene Tenison,

Soumyajit Chatterjee,

Fahim Kawsar,

Mohammad Malekzadeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tenison_2026_CVPR, author = {Tenison, Irene and Chatterjee, Soumyajit and Kawsar, Fahim and Malekzadeh, Mohammad}, title = {AdaBet: Gradient-free Layer Selection for Efficient Training of Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20233-20242} }
Dual Ascent Diffusion for Inverse Problems: Minseo Kim,

Axel Levy,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minseo and Levy, Axel and Wetzstein, Gordon}, title = {Dual Ascent Diffusion for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23622-23631} }
Grounded 3D-Aware Spatial Vision-Language Modeling: An-Chieh Cheng,

Yang Fu,

Yatai Ji,

Ligeng Zhu,

Guanqi Zhan,

Zhuoyang Zhang,

Zhaojing Yang,

Song Han,

Yao Lu,

Pavlo Molchanov,

Vidya Nariyambut Murali,

Jan Kautz,

Xiaolong Wang,

Hongxu Yin,

Sifei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, An-Chieh and Fu, Yang and Ji, Yatai and Zhu, Ligeng and Zhan, Guanqi and Zhang, Zhuoyang and Yang, Zhaojing and Han, Song and Lu, Yao and Molchanov, Pavlo and Murali, Vidya Nariyambut and Kautz, Jan and Wang, Xiaolong and Yin, Hongxu and Liu, Sifei}, title = {Grounded 3D-Aware Spatial Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16688-16700} }
Dr. Seg: Revisiting GRPO Training for Visual Large Language Models through Perception-Oriented Design: Haoxiang Sun,

Tao Wang,

Chenwei Tang,

Li Yuan,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Haoxiang and Wang, Tao and Tang, Chenwei and Yuan, Li and Lv, Jiancheng}, title = {Dr. Seg: Revisiting GRPO Training for Visual Large Language Models through Perception-Oriented Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24320-24329} }
Towards Robust Multi-Modal Semantic Segmentation with Teacher-Student Framework and Hybrid Prototype Distillation: Jiaqi Tan,

Xu Zheng,

Yang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Jiaqi and Zheng, Xu and Liu, Yang}, title = {Towards Robust Multi-Modal Semantic Segmentation with Teacher-Student Framework and Hybrid Prototype Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27665-27675} }
EchoFoley: Event-Centric Hierarchical Control for Video Grounded Creative Sound Generation: Bingxuan Li,

Yiming Cui,

Yicheng He,

Yiwei Wang,

Shu Zhang,

Longyin Wen,

Yulei Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingxuan and Cui, Yiming and He, Yicheng and Wang, Yiwei and Zhang, Shu and Wen, Longyin and Niu, Yulei}, title = {EchoFoley: Event-Centric Hierarchical Control for Video Grounded Creative Sound Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27229-27238} }
AffordGrasp: Cross-Modal Diffusion for Affordance-Aware Grasp Synthesis: Xiaofei Wu,

Yi Zhang,

Yumeng Liu,

Yuexin Ma,

Yujiao Shi,

Xuming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaofei and Zhang, Yi and Liu, Yumeng and Ma, Yuexin and Shi, Yujiao and He, Xuming}, title = {AffordGrasp: Cross-Modal Diffusion for Affordance-Aware Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15944-15953} }
RAYNOVA: Scale-Temporal Autoregressive World Modeling in Ray Space: Yichen Xie,

Chensheng Peng,

Mazen Abdelfattah,

Yihan Hu,

Jiezhi Yang,

Eric Higgins,

Ryan Brigden,

Masayoshi Tomizuka,

Wei Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yichen and Peng, Chensheng and Abdelfattah, Mazen and Hu, Yihan and Yang, Jiezhi and Higgins, Eric and Brigden, Ryan and Tomizuka, Masayoshi and Zhan, Wei}, title = {RAYNOVA: Scale-Temporal Autoregressive World Modeling in Ray Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25426-25437} }
LATTICE: Democratize High-Fidelity 3D Generation at Scale: Zeqiang Lai,

Yunfei Zhao,

Zibo Zhao,

Haolin Liu,

Qingxiang Lin,

Jingwei Huang,

Chunchao Guo,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Zeqiang and Zhao, Yunfei and Zhao, Zibo and Liu, Haolin and Lin, Qingxiang and Huang, Jingwei and Guo, Chunchao and Yue, Xiangyu}, title = {LATTICE: Democratize High-Fidelity 3D Generation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19982-19992} }
A Frame is Worth One Token: Efficient Generative World Modeling with Delta Tokens: Tommie Kerssies,

Gabriele Berton,

Ju He,

Qihang Yu,

Wufei Ma,

Daan de Geus,

Gijs Dubbelman,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kerssies_2026_CVPR, author = {Kerssies, Tommie and Berton, Gabriele and He, Ju and Yu, Qihang and Ma, Wufei and de Geus, Daan and Dubbelman, Gijs and Chen, Liang-Chieh}, title = {A Frame is Worth One Token: Efficient Generative World Modeling with Delta Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27978-27988} }
Harmony: Harmonizing Audio and Video Generation through Cross-Task Synergy: Teng Hu,

Zhentao Yu,

Guozhen Zhang,

Zihan Su,

Zhengguang Zhou,

Youliang Zhang,

Yuan Zhou,

Qinglin Lu,

Ran Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Teng and Yu, Zhentao and Zhang, Guozhen and Su, Zihan and Zhou, Zhengguang and Zhang, Youliang and Zhou, Yuan and Lu, Qinglin and Yi, Ran}, title = {Harmony: Harmonizing Audio and Video Generation through Cross-Task Synergy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16085-16095} }
Dual Graph Regularized Deep Unfolding Network for Guided Depth Map Super-resolution: Zhiwei Zhong,

Peilin Chen,

Qiangqiang Shen,

Bo Li,

Shiqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Zhiwei and Chen, Peilin and Shen, Qiangqiang and Li, Bo and Wang, Shiqi}, title = {Dual Graph Regularized Deep Unfolding Network for Guided Depth Map Super-resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16322-16332} }
Towards Robust Vision Transformers: Path Dependency Analysis and a Simple Two-Stage Adversarial Training: Seongmin Kim,

Byung Cheol Song; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seongmin and Song, Byung Cheol}, title = {Towards Robust Vision Transformers: Path Dependency Analysis and a Simple Two-Stage Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15669-15678} }
PDD: Manifold-Prior Diverse Distillation for Medical Anomaly Detection: Xijun Lu,

Hongying Liu,

Fanhua Shang,

Yanming Hui,

Liang Wan; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Xijun and Liu, Hongying and Shang, Fanhua and Hui, Yanming and Wan, Liang}, title = {PDD: Manifold-Prior Diverse Distillation for Medical Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28534-28544} }
Illustrator's Depth: Monocular Layer Index Prediction for Image Decomposition: Nissim Maruani,

Peiying Zhang,

Siddhartha Chaudhuri,

Matthew Fisher,

Nanxuan Zhao,

Vladimir G. Kim,

Pierre Alliez,

Mathieu Desbrun,

Wang Yifan; [pdf] [supp]
[bibtex]
@InProceedings{Maruani_2026_CVPR, author = {Maruani, Nissim and Zhang, Peiying and Chaudhuri, Siddhartha and Fisher, Matthew and Zhao, Nanxuan and Kim, Vladimir G. and Alliez, Pierre and Desbrun, Mathieu and Yifan, Wang}, title = {Illustrator's Depth: Monocular Layer Index Prediction for Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26824-26834} }
On the Role of Temporal Granularity in the Robustness of Spiking Neural Networks: Mengting Xu,

Shi Gu,

Peng Lin,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Mengting and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {On the Role of Temporal Granularity in the Robustness of Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27946-27955} }
MoCoDiff: A Controllable Autoregressive Diffusion Model for Expressive Motion Generation: Wenfeng Song,

Xuehan Wang,

Shuai Li,

Yi Chen,

Yuting Guo,

Zhenyu Wu,

Xingliang Jin,

Chenglizhao Chen,

Fei Hou,

Hongyu Wu,

Aimin Hao; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Wenfeng and Wang, Xuehan and Li, Shuai and Chen, Yi and Guo, Yuting and Wu, Zhenyu and Jin, Xingliang and Chen, Chenglizhao and Hou, Fei and Wu, Hongyu and Hao, Aimin}, title = {MoCoDiff: A Controllable Autoregressive Diffusion Model for Expressive Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23528-23537} }
Boosting Quantitive and Spatial Awareness for Zero-Shot Object Counting: Da Zhang,

Bingyu Li,

Feiyu Wang,

Zhiyuan Zhao,

Junyu Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Da and Li, Bingyu and Wang, Feiyu and Zhao, Zhiyuan and Gao, Junyu}, title = {Boosting Quantitive and Spatial Awareness for Zero-Shot Object Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20356-20366} }
OpenVoxel: Training-Free Grouping and Captioning Voxels for Open-Vocabulary 3D Scene Understanding: Sheng-Yu Huang,

Jaesung Choe,

Yu-Chiang Frank Wang,

Cheng Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Sheng-Yu and Choe, Jaesung and Wang, Yu-Chiang Frank and Sun, Cheng}, title = {OpenVoxel: Training-Free Grouping and Captioning Voxels for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16734-16745} }
RAGTrack: Language-aware RGBT Tracking with Retrieval-Augmented Generation: Hao Li,

Yuhao Wang,

Wenning Hao,

Pingping Zhang,

Dong Wang,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hao and Wang, Yuhao and Hao, Wenning and Zhang, Pingping and Wang, Dong and Lu, Huchuan}, title = {RAGTrack: Language-aware RGBT Tracking with Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28179-28189} }
Federated Active Learning Under Extreme Non-IID and Global Class Imbalance: Chen-Chen Zong,

Sheng-Jun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zong_2026_CVPR, author = {Zong, Chen-Chen and Huang, Sheng-Jun}, title = {Federated Active Learning Under Extreme Non-IID and Global Class Imbalance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24534-24544} }
Streaming Diffusion Model for Fast Infrared and Visible Video Fusion: Jinyuan Liu,

Ludan Sun,

Tengyu Ma,

Chunyan Yang,

Zhiying Jiang,

Long Ma,

Risheng Liu,

Xin Fan; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinyuan and Sun, Ludan and Ma, Tengyu and Yang, Chunyan and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {Streaming Diffusion Model for Fast Infrared and Visible Video Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14305-14314} }
Tunable Soft Equivariance with Guarantees: Md Ashiqur Rahman,

Lim Jun Hao,

Jeremiah Jiang,

Teck-Yian Lim,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahman_2026_CVPR, author = {Rahman, Md Ashiqur and Hao, Lim Jun and Jiang, Jeremiah and Lim, Teck-Yian and Yeh, Raymond A.}, title = {Tunable Soft Equivariance with Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17693-17703} }
ReAttnCLIP: Training-Free Open-Vocabulary Remote Sensing Image Segmentation via Re-defined Attention in CLIP: Xin Niu,

Manqi Zhao,

Dongsheng Jiang,

Yingying Wu,

Bing Su; [pdf] [supp]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Xin and Zhao, Manqi and Jiang, Dongsheng and Wu, Yingying and Su, Bing}, title = {ReAttnCLIP: Training-Free Open-Vocabulary Remote Sensing Image Segmentation via Re-defined Attention in CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24980-24989} }
UniVerse: Empower Unified Generation with Reasoning and Knowledge: Kaiyue Sun,

Weiyang Jin,

Chengqi Duan,

Rongyao Fang,

Xian Liu,

Yuwei Niu,

Chunwei Wang,

Aoxue Li,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Kaiyue and Jin, Weiyang and Duan, Chengqi and Fang, Rongyao and Liu, Xian and Niu, Yuwei and Wang, Chunwei and Li, Aoxue and Liu, Xihui}, title = {UniVerse: Empower Unified Generation with Reasoning and Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21997-22006} }
EcoAlign: An Economically Rational Framework for Efficient LVLM Alignment: Ruoxi Cheng,

Hao-Xuan Ma,

Teng Ma,

Hongyi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ruoxi and Ma, Hao-Xuan and Ma, Teng and Zhang, Hongyi}, title = {EcoAlign: An Economically Rational Framework for Efficient LVLM Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17451-17461} }
Building Robust Vision Encoders for Cross-Dataset Evaluation in Immunofluorescent Microscopy: Umar Marikkar,

Syed Sameed Husain,

Muhammad Awais,

Sara Atito; [pdf] [supp]
[bibtex]
@InProceedings{Marikkar_2026_CVPR, author = {Marikkar, Umar and Husain, Syed Sameed and Awais, Muhammad and Atito, Sara}, title = {Building Robust Vision Encoders for Cross-Dataset Evaluation in Immunofluorescent Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28308-28317} }
Gated KalmaNet: A Fading Memory Layer through Test-time Ridge Regression: Liangzu Peng,

Aditya Chattopadhyay,

Luca Zancato,

Elvis Nunez,

Wei Xia,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Liangzu and Chattopadhyay, Aditya and Zancato, Luca and Nunez, Elvis and Xia, Wei and Soatto, Stefano}, title = {Gated KalmaNet: A Fading Memory Layer through Test-time Ridge Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20212-20222} }
Rethinking Token Reduction for Large Vision-Language Models: Yi Wang,

Haofei Zhang,

Qihan Huang,

Anda Cao,

Gongfan Fang,

Wei Wang,

Xuan Jin,

Jie Song,

Mingli Song,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yi and Zhang, Haofei and Huang, Qihan and Cao, Anda and Fang, Gongfan and Wang, Wei and Jin, Xuan and Song, Jie and Song, Mingli and Wang, Xinchao}, title = {Rethinking Token Reduction for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24727-24737} }
CaptionQA: Is Your Caption as Useful as the Image Itself?: Shijia Yang,

Yunong Liu,

Bohan Zhai,

Ximeng Sun,

Zicheng Liu,

Emad Barsoum,

Manling Li,

Chenfeng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Shijia and Liu, Yunong and Zhai, Bohan and Sun, Ximeng and Liu, Zicheng and Barsoum, Emad and Li, Manling and Xu, Chenfeng}, title = {CaptionQA: Is Your Caption as Useful as the Image Itself?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23741-23750} }
FlowDC: Flow-Based Decoupling-Decay for Complex Image Editing: Yilei Jiang,

Zhen Wang,

Yanghao Wang,

Jun Yu,

Yueting Zhuang,

Jun Xiao,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yilei and Wang, Zhen and Wang, Yanghao and Yu, Jun and Zhuang, Yueting and Xiao, Jun and Chen, Long}, title = {FlowDC: Flow-Based Decoupling-Decay for Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25757-25766} }
PDCR: Perception-Decomposed Confidence Reward for Vision-Language Reasoning: Hee Suk Yoon,

Eunseop Yoon,

Ji Woo Hong,

SooHwan Eom,

Gwanhyeong Koo,

Mark Hasegawa-Johnson,

Qi Dai,

Chong Luo,

Chang D. Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Hee Suk and Yoon, Eunseop and Hong, Ji Woo and Eom, SooHwan and Koo, Gwanhyeong and Hasegawa-Johnson, Mark and Dai, Qi and Luo, Chong and Yoo, Chang D.}, title = {PDCR: Perception-Decomposed Confidence Reward for Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18881-18891} }
MeanFlow Transformers with Representation Autoencoders: Zheyuan Hu,

Chieh-Hsin Lai,

Ge Wu,

Yuki Mitsufuji,

Stefano Ermon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zheyuan and Lai, Chieh-Hsin and Wu, Ge and Mitsufuji, Yuki and Ermon, Stefano}, title = {MeanFlow Transformers with Representation Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25709-25718} }
WeMMU: Enhanced Bridging of Vision-Language Models and Diffusion Models via Noisy Query Tokens: Jian Yang,

Dacheng Yin,

Xiaoxuan He,

Yong Li,

Fengyun Rao,

Jing Lyu,

Wei Zhai,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jian and Yin, Dacheng and He, Xiaoxuan and Li, Yong and Rao, Fengyun and Lyu, Jing and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {WeMMU: Enhanced Bridging of Vision-Language Models and Diffusion Models via Noisy Query Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17609-17618} }
PAM: A Pose-Appearance-Motion Engine for Sim-to-Real HOI Video Generation: Mingju Gao,

Kaisen Yang,

Huan-ang Gao,

Bohan Li,

Ao Ding,

Wenyi Li,

Yangcheng Yu,

Jinkun Liu,

Shaocong Xu,

Yike Niu,

Haohan Chi,

Hao Chen,

Hao Tang,

Yu Zhang,

Li Yi,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Mingju and Yang, Kaisen and Gao, Huan-ang and Li, Bohan and Ding, Ao and Li, Wenyi and Yu, Yangcheng and Liu, Jinkun and Xu, Shaocong and Niu, Yike and Chi, Haohan and Chen, Hao and Tang, Hao and Zhang, Yu and Yi, Li and Zhao, Hao}, title = {PAM: A Pose-Appearance-Motion Engine for Sim-to-Real HOI Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15954-15965} }
Learning from Semantic Dictionaries: Discriminative Codebook Contrastive Learning for Unified Visual Representation and Generation: Imanol G. Estepa,

Jesús M. Rodríguez-de-Vera,

Bhalaji Nagarajan,

Petia Radeva; [pdf] [supp]
[bibtex]
@InProceedings{Estepa_2026_CVPR, author = {Estepa, Imanol G. and Rodr{\'\i}guez-de-Vera, Jes\'us M. and Nagarajan, Bhalaji and Radeva, Petia}, title = {Learning from Semantic Dictionaries: Discriminative Codebook Contrastive Learning for Unified Visual Representation and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22456-22466} }
ZipMap: Linear-Time Stateful 3D Reconstruction via Test-Time Training: Haian Jin,

Rundi Wu,

Tianyuan Zhang,

Ruiqi Gao,

Jonathan T. Barron,

Noah Snavely,

Aleksander Hołyński; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Haian and Wu, Rundi and Zhang, Tianyuan and Gao, Ruiqi and Barron, Jonathan T. and Snavely, Noah and Ho{\l}y\'nski, Aleksander}, title = {ZipMap: Linear-Time Stateful 3D Reconstruction via Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21748-21759} }
Retrieve and Segment: Are a Few Examples Enough to Bridge the Supervision Gap in Open-Vocabulary Segmentation?: Tilemachos Aravanis,

Vladan Stojnić,

Bill Psomas,

Nikos Komodakis,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Aravanis_2026_CVPR, author = {Aravanis, Tilemachos and Stojni\'c, Vladan and Psomas, Bill and Komodakis, Nikos and Tolias, Giorgos}, title = {Retrieve and Segment: Are a Few Examples Enough to Bridge the Supervision Gap in Open-Vocabulary Segmentation?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27621-27632} }
NaTex: Seamless Texture Generation as Latent Color Diffusion: Zeqiang Lai,

Yunfei Zhao,

Zibo Zhao,

Xin Yang,

Xin Huang,

Jingwei Huang,

Xiangyu Yue,

Chunchao Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Zeqiang and Zhao, Yunfei and Zhao, Zibo and Yang, Xin and Huang, Xin and Huang, Jingwei and Yue, Xiangyu and Guo, Chunchao}, title = {NaTex: Seamless Texture Generation as Latent Color Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18619-18629} }
HoneyBee: Data Recipes for Vision-Language Reasoners: Hritik Bansal,

Devendra Singh Sachan,

Kai-Wei Chang,

Aditya Grover,

Gargi Ghosh,

Wen-tau Yih,

Ramakanth Pasunuru; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bansal_2026_CVPR, author = {Bansal, Hritik and Sachan, Devendra Singh and Chang, Kai-Wei and Grover, Aditya and Ghosh, Gargi and Yih, Wen-tau and Pasunuru, Ramakanth}, title = {HoneyBee: Data Recipes for Vision-Language Reasoners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26262-26273} }
Condensed Test-Time Adaptation of VLMs for Action Recognition: Wenxuan Ge,

Hongyu Qu,

Rui Yan,

Guo-Sen Xie,

Yazhou Yao,

Xiangbo Shu,

Jinhui Tang; [pdf] [supp]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Wenxuan and Qu, Hongyu and Yan, Rui and Xie, Guo-Sen and Yao, Yazhou and Shu, Xiangbo and Tang, Jinhui}, title = {Condensed Test-Time Adaptation of VLMs for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16977-16987} }
RI-Mamba: Rotation-Invariant Mamba for Robust Text-to-Shape Retrieval: Khanh Nguyen,

Dasith de Silva Edirimuni,

Ghulam Mubashar Hassan,

Ajmal Mian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Khanh and de Silva Edirimuni, Dasith and Hassan, Ghulam Mubashar and Mian, Ajmal}, title = {RI-Mamba: Rotation-Invariant Mamba for Robust Text-to-Shape Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16834-16844} }
PointThinker: Point-Incentivized Parallel Thinking for Multimodal Large Language Model: Zhengdong Hu,

Chao Wang,

Fengyun Rao,

Jing LYU,

Hehe Fan,

Yi Yang; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zhengdong and Wang, Chao and Rao, Fengyun and LYU, Jing and Fan, Hehe and Yang, Yi}, title = {PointThinker: Point-Incentivized Parallel Thinking for Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26240-26250} }
Balanced Hierarchical Contrastive Learning with Decoupled Queries for Fine-grained Object Detection in Remote Sensing Images: Jingzhou Chen,

Dexin Chen,

Fengchao Xiong,

Yuntao Qian,

Liang Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jingzhou and Chen, Dexin and Xiong, Fengchao and Qian, Yuntao and Xiao, Liang}, title = {Balanced Hierarchical Contrastive Learning with Decoupled Queries for Fine-grained Object Detection in Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20619-20628} }
Stand-In: A Lightweight and Plug-and-Play Identity Control for Video Generation: Bowen Xue,

Zheng-Peng Duan,

Qixin Yan,

Wenjing Wang,

Hao Liu,

Chun-Le Guo,

Chongyi Li,

Chen Li,

Jing Lyu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Bowen and Duan, Zheng-Peng and Yan, Qixin and Wang, Wenjing and Liu, Hao and Guo, Chun-Le and Li, Chongyi and Li, Chen and Lyu, Jing}, title = {Stand-In: A Lightweight and Plug-and-Play Identity Control for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23314-23324} }
VecGlypher: Unified Vector Glyph Generation with Language Models: Xiaoke Huang,

Bhavul Gauri,

Kam Woh Ng,

Tony Ng,

Mengmeng Xu,

Zhiheng Liu,

Weiming Ren,

Zhaochong An,

Zijian Zhou,

Haonan Qiu,

Yuyin Zhou,

Sen He,

Ziheng Wang,

Tao Xiang,

Xiao Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xiaoke and Gauri, Bhavul and Ng, Kam Woh and Ng, Tony and Xu, Mengmeng and Liu, Zhiheng and Ren, Weiming and An, Zhaochong and Zhou, Zijian and Qiu, Haonan and Zhou, Yuyin and He, Sen and Wang, Ziheng and Xiang, Tao and Han, Xiao}, title = {VecGlypher: Unified Vector Glyph Generation with Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24373-24383} }
PhyOceanCast: Global Ocean Forecasting with Physics-Informed Diffusion: Qixiu Li,

Xiang Zhu,

Xiaoyong Li,

Xiaolong Xu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qixiu and Zhu, Xiang and Li, Xiaoyong and Xu, Xiaolong}, title = {PhyOceanCast: Global Ocean Forecasting with Physics-Informed Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23652-23662} }
GraPHFormer: A Multimodal Graph Persistent Homology Transformer for the Analysis of Neuroscience Morphologies: Uzair Shah,

Marco Agus,

Mahmoud Gamal,

Mahmood Alzubaidi,

Corrado Cali,

Pierre J. Magistretti,

Abdesselam Bouzerdoum,

Mowafa Househ; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shah_2026_CVPR, author = {Shah, Uzair and Agus, Marco and Gamal, Mahmoud and Alzubaidi, Mahmood and Cali, Corrado and Magistretti, Pierre J. and Bouzerdoum, Abdesselam and Househ, Mowafa}, title = {GraPHFormer: A Multimodal Graph Persistent Homology Transformer for the Analysis of Neuroscience Morphologies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28224-28233} }
Locate-Then-Examine: Grounded Region Reasoning Improves Detection of AI-Generated Images: Yikun Ji,

Yan Hong,

Bowen Deng,

Jun Lan,

Huijia Zhu,

Weiqiang Wang,

Liqing Zhang,

Jianfu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Yikun and Hong, Yan and Deng, Bowen and Lan, Jun and Zhu, Huijia and Wang, Weiqiang and Zhang, Liqing and Zhang, Jianfu}, title = {Locate-Then-Examine: Grounded Region Reasoning Improves Detection of AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19165-19175} }
Dehallu3D: Hallucination-Mitigated 3D Generation from a Single Image via Cyclic View Consistency Refinement: Xiwen Wang,

Shichao Zhang,

Ruowei Wang,

Mao Li,

Chenyu Zhou,

Ji-Zhe Zhou,

Qijun Zhao,

Hailun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiwen and Zhang, Shichao and Wang, Ruowei and Li, Mao and Zhou, Chenyu and Zhou, Ji-Zhe and Zhao, Qijun and Zhang, Hailun}, title = {Dehallu3D: Hallucination-Mitigated 3D Generation from a Single Image via Cyclic View Consistency Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19993-20002} }
TextPecker: Rewarding Structural Anomaly Quantification for Enhancing Visual Text Rendering: Hanshen Zhu,

Yuliang Liu,

Xuecheng Wu,

An-Lan Wang,

Hao Feng,

Dingkang Yang,

Chao Feng,

Can Huang,

Jingqun Tang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hanshen and Liu, Yuliang and Wu, Xuecheng and Wang, An-Lan and Feng, Hao and Yang, Dingkang and Feng, Chao and Huang, Can and Tang, Jingqun and Bai, Xiang}, title = {TextPecker: Rewarding Structural Anomaly Quantification for Enhancing Visual Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22059-22069} }
Semantic Audio-Visual Navigation in Continuous Environments: Yichen Zeng,

Hebaixu Wang,

Meng Liu,

Yu Zhou,

Chen Gao,

Kehan Chen,

Gongping Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yichen and Wang, Hebaixu and Liu, Meng and Zhou, Yu and Gao, Chen and Chen, Kehan and Huang, Gongping}, title = {Semantic Audio-Visual Navigation in Continuous Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22369-22379} }
Training High-Level Schedulers with Execution-Feedback Reinforcement Learning for Long-Horizon GUI Automation: Zehao Deng,

Tianjie Ju,

Zheng Wu,

Zhuosheng Zhang,

Gongshen Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Zehao and Ju, Tianjie and Wu, Zheng and Zhang, Zhuosheng and Liu, Gongshen}, title = {Training High-Level Schedulers with Execution-Feedback Reinforcement Learning for Long-Horizon GUI Automation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27525-27535} }
TGTrack: Temporal Generative Learning for Unified Single Object Tracking: Wanting Geng,

Xin Chen,

Chuanyu Sun,

Jie Zhao,

Ben Kang,

Dong Wang,

Huchuan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Wanting and Chen, Xin and Sun, Chuanyu and Zhao, Jie and Kang, Ben and Wang, Dong and Lu, Huchuan}, title = {TGTrack: Temporal Generative Learning for Unified Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28134-28144} }
OrienPose: Orientation-Guided Novel View Synthesis for Single-Image Unseen Object Pose Estimation: Yating Liu,

Zhaoshuai Qi,

Yang Zou,

Yongnan Yang,

Shizhou Zhang,

Yanning Zhang; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yating and Qi, Zhaoshuai and Zou, Yang and Yang, Yongnan and Zhang, Shizhou and Zhang, Yanning}, title = {OrienPose: Orientation-Guided Novel View Synthesis for Single-Image Unseen Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26813-26823} }
Towards Motion Turing Test: Evaluating Human-Likeness in Humanoid Robots: Mingzhe Li,

Mengyin Liu,

Zekai Wu,

Xincheng Lin,

Junsheng Zhang,

Ming Yan,

Zengye Xie,

Changwang Zhang,

Chenglu Wen,

Lan Xu,

Siqi Shen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingzhe and Liu, Mengyin and Wu, Zekai and Lin, Xincheng and Zhang, Junsheng and Yan, Ming and Xie, Zengye and Zhang, Changwang and Wen, Chenglu and Xu, Lan and Shen, Siqi and Wang, Cheng}, title = {Towards Motion Turing Test: Evaluating Human-Likeness in Humanoid Robots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16486-16498} }
VULCAN: Tool-Augmented Multi Agents for Iterative 3D Object Arrangement: Zhengfei Kuang,

Rui Lin,

Long Zhao,

Gordon Wetzstein,

Saining Xie,

Sanghyun Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2026_CVPR, author = {Kuang, Zhengfei and Lin, Rui and Zhao, Long and Wetzstein, Gordon and Xie, Saining and Woo, Sanghyun}, title = {VULCAN: Tool-Augmented Multi Agents for Iterative 3D Object Arrangement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23763-23773} }
DiT360: High-Fidelity Panoramic Image Generation via Hybrid Training: Haoran Feng,

Dizhe Zhang,

Xiangtai Li,

Bo Du,

Lu Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Haoran and Zhang, Dizhe and Li, Xiangtai and Du, Bo and Qi, Lu}, title = {DiT360: High-Fidelity Panoramic Image Generation via Hybrid Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23367-23377} }
HTTM: Head-wise Temporal Token Merging for Faster VGGT: Weitian Wang,

Lukas Meiner,

Rai Shubham,

Cecilia De La Parra,

Akash Kumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Weitian and Meiner, Lukas and Shubham, Rai and De La Parra, Cecilia and Kumar, Akash}, title = {HTTM: Head-wise Temporal Token Merging for Faster VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26379-26388} }
SenCache: Accelerating Diffusion Model Inference via Sensitivity-Aware Caching: Yasaman Haghighi,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haghighi_2026_CVPR, author = {Haghighi, Yasaman and Alahi, Alexandre}, title = {SenCache: Accelerating Diffusion Model Inference via Sensitivity-Aware Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14295-14304} }
PAI-Bench: A Comprehensive Benchmark For Physical AI: Fengzhe Zhou,

Jiannan Huang,

Jialuo Li,

Deva Ramanan,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Fengzhe and Huang, Jiannan and Li, Jialuo and Ramanan, Deva and Shi, Humphrey}, title = {PAI-Bench: A Comprehensive Benchmark For Physical AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21522-21536} }
ArtHOI: Taming Foundation Models for Monocular 4D Reconstruction of Hand-Articulated-Object Interactions: Zikai Wang,

Zhilu Zhang,

Yiqing Wang,

Hui Li,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zikai and Zhang, Zhilu and Wang, Yiqing and Li, Hui and Zuo, Wangmeng}, title = {ArtHOI: Taming Foundation Models for Monocular 4D Reconstruction of Hand-Articulated-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15998-16009} }
QueryOcc: Query-based Self-Supervision for 3D Semantic Occupancy: Adam Lilja,

Ji Lan,

Junsheng Fu,

Lars Hammarstrand; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lilja_2026_CVPR, author = {Lilja, Adam and Lan, Ji and Fu, Junsheng and Hammarstrand, Lars}, title = {QueryOcc: Query-based Self-Supervision for 3D Semantic Occupancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21399-21408} }
HDW-SR: High-Frequency Guided Diffusion Model based on Wavelet Decomposition for Image Super-Resolution: Chao Yang,

Boqian Zhang,

Jinghao Xu,

Guang Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Chao and Zhang, Boqian and Xu, Jinghao and Jiang, Guang}, title = {HDW-SR: High-Frequency Guided Diffusion Model based on Wavelet Decomposition for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23462-23472} }
MV-TAP: Tracking Any Point in Multi-View Videos: Jahyeok Koo,

Inès Hyeonsu Kim,

Mungyeom Kim,

Junghyun Park,

Seohyeon Park,

Jaeyeong Kim,

Jung Yi,

Seokju Cho,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koo_2026_CVPR, author = {Koo, Jahyeok and Kim, In\`es Hyeonsu and Kim, Mungyeom and Park, Junghyun and Park, Seohyeon and Kim, Jaeyeong and Yi, Jung and Cho, Seokju and Kim, Seungryong}, title = {MV-TAP: Tracking Any Point in Multi-View Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20932-20941} }
MHopReg: Efficient Hierarchical Multi-Hop Graph Search for Point Cloud Registration: Yue Wu,

Feng Xiao,

Yongzhe Yuan,

Hao Li,

Kaiyuan Feng,

Maoguo Gong,

Qiguang Miao,

Wenping Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yue and Xiao, Feng and Yuan, Yongzhe and Li, Hao and Feng, Kaiyuan and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {MHopReg: Efficient Hierarchical Multi-Hop Graph Search for Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24217-24226} }
EVATok: Adaptive Length Video Tokenization for Efficient Visual Autoregressive Generation: Tianwei Xiong,

Jun Hao Liew,

Zilong Huang,

Zhijie Lin,

Jiashi Feng,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianwei and Liew, Jun Hao and Huang, Zilong and Lin, Zhijie and Feng, Jiashi and Liu, Xihui}, title = {EVATok: Adaptive Length Video Tokenization for Efficient Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23249-23259} }
Self-Diffusion Driven Blind Imaging: Yanlong Yang,

Guanxiong Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yanlong and Luo, Guanxiong}, title = {Self-Diffusion Driven Blind Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26711-26720} }
Delta Rectified Flow Sampling for Text-to-Image Editing: Gaspard Beaudouin,

Minghan Li,

Jaeyeon Kim,

Sung-Hoon Yoon,

Mengyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beaudouin_2026_CVPR, author = {Beaudouin, Gaspard and Li, Minghan and Kim, Jaeyeon and Yoon, Sung-Hoon and Wang, Mengyu}, title = {Delta Rectified Flow Sampling for Text-to-Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18662-18672} }
EgoProx: Evaluating MLLMs on Egocentric 3D Proximity Reasoning Across a Cognitive Hierarchy: Jinzhao Li,

Yinuo Chen,

Dongxu Piao,

Panwang Pan,

Yifan Yu,

Dong Wang,

Honglei Yan,

Liang Yue,

Shaofei Wang,

Yixin Chen,

Siyuan Huang,

Miao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinzhao and Chen, Yinuo and Piao, Dongxu and Pan, Panwang and Yu, Yifan and Wang, Dong and Yan, Honglei and Yue, Liang and Wang, Shaofei and Chen, Yixin and Huang, Siyuan and Liu, Miao}, title = {EgoProx: Evaluating MLLMs on Egocentric 3D Proximity Reasoning Across a Cognitive Hierarchy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23751-23762} }
ST4R-Splat: Spatio-Temporal Referring Segmentation in 4D Gaussian Splatting: Yuming Meng,

Dong Wu,

Hongbin Zha; [pdf]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Yuming and Wu, Dong and Zha, Hongbin}, title = {ST4R-Splat: Spatio-Temporal Referring Segmentation in 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17598-17608} }
Archon: A Unified Multimodal Model for Holistic Digital Human Generation: Chong Bao,

Shichen Liu,

Lijun Yu,

David Futschik,

Stylianos Moschoglou,

Shefali Srivastava,

Ziqian Bai,

Feitong Tan,

Guofeng Zhang,

Zhaopeng Cui,

Sean Fanello,

Yinda Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Chong and Liu, Shichen and Yu, Lijun and Futschik, David and Moschoglou, Stylianos and Srivastava, Shefali and Bai, Ziqian and Tan, Feitong and Zhang, Guofeng and Cui, Zhaopeng and Fanello, Sean and Zhang, Yinda}, title = {Archon: A Unified Multimodal Model for Holistic Digital Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16464-16474} }
Suppressing Non-Semantic Noise in Masked Image Modeling Representations: Martine Hjelkrem-Tan,

Marius Aasan,

Rwiddhi Chakraborty,

Gabriel Y. Arteaga,

Changkyu Choi,

Adín Ramírez Rivera; [pdf] [supp]
[bibtex]
@InProceedings{Hjelkrem-Tan_2026_CVPR, author = {Hjelkrem-Tan, Martine and Aasan, Marius and Chakraborty, Rwiddhi and Arteaga, Gabriel Y. and Choi, Changkyu and Rivera, Ad{\'\i}n Ram{\'\i}rez}, title = {Suppressing Non-Semantic Noise in Masked Image Modeling Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19319-19329} }
Harmonized Feature Conditioning and Frequency-Prompt Personalization for Multi-Rater Medical Segmentation: Sanaz Karimijafarbigloo,

Armin Khosravi,

Alireza Kheyrkhah,

Reza Azad,

Mauricio Reyes,

Dorit Merhof; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karimijafarbigloo_2026_CVPR, author = {Karimijafarbigloo, Sanaz and Khosravi, Armin and Kheyrkhah, Alireza and Azad, Reza and Reyes, Mauricio and Merhof, Dorit}, title = {Harmonized Feature Conditioning and Frequency-Prompt Personalization for Multi-Rater Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22794-22803} }
Velox: Learning Representations of 4D Geometry and Appearance: Anagh Malik,

Dorian Chan,

Xiaoming Zhao,

David B. Lindell,

Oncel Tuzel,

Jen-Hao Rick Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Malik_2026_CVPR, author = {Malik, Anagh and Chan, Dorian and Zhao, Xiaoming and Lindell, David B. and Tuzel, Oncel and Chang, Jen-Hao Rick}, title = {Velox: Learning Representations of 4D Geometry and Appearance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19894-19906} }
Aligning Text, Images and 3D Structure Token-by-Token: Aadarsh Sahoo,

Vansh Tibrewal,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sahoo_2026_CVPR, author = {Sahoo, Aadarsh and Tibrewal, Vansh and Gkioxari, Georgia}, title = {Aligning Text, Images and 3D Structure Token-by-Token}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14905-14914} }
Linking Perception, Confidence and Accuracy in MLLMs: Yuetian Du,

Yucheng Wang,

Rongyu Zhang,

Zhijie Xu,

Boyu Yang,

Ming Kong,

Jie Liu,

Qiang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Yuetian and Wang, Yucheng and Zhang, Rongyu and Xu, Zhijie and Yang, Boyu and Kong, Ming and Liu, Jie and Zhu, Qiang}, title = {Linking Perception, Confidence and Accuracy in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25914-25924} }
Diagram2Structure: Unlocking LLMs' Diagram Comprehension through DiagramDiff, a Framework for Structuring Offline Diagrams: Haoxiang Hu,

Yaokun Li,

Zeyuan Huang,

Cangjun Gao,

Qiang He,

Qingkun Li,

Xiaoming Deng,

Cuixia Ma,

Yu-Kun Lai,

Yong-Jin Liu,

Hongan Wang; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Haoxiang and Li, Yaokun and Huang, Zeyuan and Gao, Cangjun and He, Qiang and Li, Qingkun and Deng, Xiaoming and Ma, Cuixia and Lai, Yu-Kun and Liu, Yong-Jin and Wang, Hongan}, title = {Diagram2Structure: Unlocking LLMs' Diagram Comprehension through DiagramDiff, a Framework for Structuring Offline Diagrams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24395-24404} }
AVA-Bench: Atomic Visual Ability Benchmark for Vision Foundation Models: Zheda Mai,

Arpita Chowdhury,

Zihe Wang,

Sooyoung Jeon,

Lemeng Wang,

Jiacheng Hou,

Jihyung Kil,

Wei-Lun Chao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Zheda and Chowdhury, Arpita and Wang, Zihe and Jeon, Sooyoung and Wang, Lemeng and Hou, Jiacheng and Kil, Jihyung and Chao, Wei-Lun}, title = {AVA-Bench: Atomic Visual Ability Benchmark for Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25925-25937} }
MOMO: Mars Orbital MOdel Foundation Model for Mars Orbital Applications: Mirali Purohit,

Bimal Gajera,

Irish Mehta,

Bhanu Tokas,

Jacob Adler,

Steven Lu,

Scott Dickenshied,

Serina Diniega,

Brian Bue,

Umaa Rebbapragada,

Hannah Kerner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Purohit_2026_CVPR, author = {Purohit, Mirali and Gajera, Bimal and Mehta, Irish and Tokas, Bhanu and Adler, Jacob and Lu, Steven and Dickenshied, Scott and Diniega, Serina and Bue, Brian and Rebbapragada, Umaa and Kerner, Hannah}, title = {MOMO: Mars Orbital MOdel Foundation Model for Mars Orbital Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27772-27782} }
LiDAR Prompted Spatio-Temporal Multi-View Stereo for Autonomous Driving: Qihao Sun,

Jiarun Liu,

Ziqian Ni,

Jianyun Xu,

Sheng Yang,

Tao Xie,

Lijun Zhao,

Ruifeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Qihao and Liu, Jiarun and Ni, Ziqian and Xu, Jianyun and Yang, Sheng and Xie, Tao and Zhao, Lijun and Li, Ruifeng}, title = {LiDAR Prompted Spatio-Temporal Multi-View Stereo for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14567-14577} }
Zero-Shot Image Denoising via Hybrid Prior-Guided Pseudo Sample Generation: Xiaole Zhao,

Qingsong Pang,

Xiaobo Zhang,

Xun Xu,

Xun Gong,

Yan Yang,

Tianrui Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xiaole and Pang, Qingsong and Zhang, Xiaobo and Xu, Xun and Gong, Xun and Yang, Yan and Li, Tianrui}, title = {Zero-Shot Image Denoising via Hybrid Prior-Guided Pseudo Sample Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22648-22657} }
ADSeeker: A Knowledge-Grounded Reasoning Framework for Industry Anomaly Detection and Reasoning: Kai Zhang,

Zekai Zhang,

Xihe Sun,

Anpeng Wang,

Jingmeng Nie,

Qinghui Chen,

Han Hao,

Jianyuan Guo,

Jinglin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kai and Zhang, Zekai and Sun, Xihe and Wang, Anpeng and Nie, Jingmeng and Chen, Qinghui and Hao, Han and Guo, Jianyuan and Zhang, Jinglin}, title = {ADSeeker: A Knowledge-Grounded Reasoning Framework for Industry Anomaly Detection and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21379-21388} }
UniLS: End-to-End Audio-Driven Avatars for Unified Listening and Speaking: Xuangeng Chu,

Ruicong Liu,

Yifei Huang,

Yun Liu,

Yichen Peng,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Xuangeng and Liu, Ruicong and Huang, Yifei and Liu, Yun and Peng, Yichen and Zheng, Bo}, title = {UniLS: End-to-End Audio-Driven Avatars for Unified Listening and Speaking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25142-25152} }
CineScene: Implicit 3D as Effective Scene Representation for Cinematic Video Generation: Kaiyi Huang,

Yukun Huang,

Yu Li,

Jianhong Bai,

Xintao Wang,

Zinan Lin,

Xuefei Ning,

Jiwen Yu,

Yu Wang,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Kaiyi and Huang, Yukun and Li, Yu and Bai, Jianhong and Wang, Xintao and Lin, Zinan and Ning, Xuefei and Yu, Jiwen and Wang, Yu and Liu, Xihui}, title = {CineScene: Implicit 3D as Effective Scene Representation for Cinematic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25381-25392} }
NIL: No-data Imitation Learning: Mert Albaba,

Chenhao Li,

Markos Diomataris,

Omid Taheri,

Andreas Krause,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Albaba_2026_CVPR, author = {Albaba, Mert and Li, Chenhao and Diomataris, Markos and Taheri, Omid and Krause, Andreas and Black, Michael J.}, title = {NIL: No-data Imitation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20823-20833} }
Humanoid Generative Pre-Training for Zero-Shot Motion Tracking: Zekun Qi,

Xuchuan Chen,

Jilong Wang,

Chenghuai Lin,

Yunrui Lian,

Wenyao Zhang,

Xinqiang Yu,

He Wang,

Li Yi; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Zekun and Chen, Xuchuan and Wang, Jilong and Lin, Chenghuai and Lian, Yunrui and Zhang, Wenyao and Yu, Xinqiang and Wang, He and Yi, Li}, title = {Humanoid Generative Pre-Training for Zero-Shot Motion Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20834-20844} }
SAMIX: Reinforcing SAM2 with Semantic Adapter and Reference Selecting Policy for Mix-Supervised Segmentation: Qiang Hu,

Jiajie Wei,

Zhenyu Yi,

Zhifen Yan,

Yingjie Guo,

Hongkuan Shi,

Ge-Peng Ji,

Qiang Li,

Zhiwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Qiang and Wei, Jiajie and Yi, Zhenyu and Yan, Zhifen and Guo, Yingjie and Shi, Hongkuan and Ji, Ge-Peng and Li, Qiang and Wang, Zhiwei}, title = {SAMIX: Reinforcing SAM2 with Semantic Adapter and Reference Selecting Policy for Mix-Supervised Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17948-17958} }
Stochastic Ray Tracing for the Reconstruction of 3D Gaussian Splatting: Peiyu Xu,

Shuang Zhao,

Xin Sun,

Krishna Mullia,

Raymond Fei,

Iliyan Georgiev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Peiyu and Zhao, Shuang and Sun, Xin and Mullia, Krishna and Fei, Raymond and Georgiev, Iliyan}, title = {Stochastic Ray Tracing for the Reconstruction of 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19001-19010} }
Uni-Encoder Meets Multi-Encoders: Representation Before Fusion for Brain Tumor Segmentation with Missing Modalities: Peibo Song,

Xiaotian Xue,

Jinshuo Zhang,

Zihao Wang,

Jinhua Liu,

Shujun Fu,

Fangxun Bao,

Si Yong Yeo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Peibo and Xue, Xiaotian and Zhang, Jinshuo and Wang, Zihao and Liu, Jinhua and Fu, Shujun and Bao, Fangxun and Yeo, Si Yong}, title = {Uni-Encoder Meets Multi-Encoders: Representation Before Fusion for Brain Tumor Segmentation with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15627-15638} }
Training-free Mixed-Resolution Latent Upsampling for Spatially Accelerated Diffusion Transformers: Wongi Jeong,

Kyungryeol Lee,

Hoigi Seo,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wongi and Lee, Kyungryeol and Seo, Hoigi and Chun, Se Young}, title = {Training-free Mixed-Resolution Latent Upsampling for Spatially Accelerated Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18673-18682} }
PersonaLive! Expressive Portrait Image Animation for Live Streaming: Zhiyuan Li,

Chi-Man Pun,

Chen Fang,

Jue Wang,

Xiaodong Cun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiyuan and Pun, Chi-Man and Fang, Chen and Wang, Jue and Cun, Xiaodong}, title = {PersonaLive! Expressive Portrait Image Animation for Live Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18118-18128} }
ProM3E: Probabilistic Masked MultiModal Embedding Model for Ecology: Srikumar Sastry,

Subash Khanal,

Aayush Dhakal,

Jiayu Lin,

Dan Cher,

Phoenix Jarosz,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sastry_2026_CVPR, author = {Sastry, Srikumar and Khanal, Subash and Dhakal, Aayush and Lin, Jiayu and Cher, Dan and Jarosz, Phoenix and Jacobs, Nathan}, title = {ProM3E: Probabilistic Masked MultiModal Embedding Model for Ecology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20564-20574} }
FUN REC * Reconstructing Functional 3D Scenes from Egocentric Interaction Videos: Alexandros Delitzas,

Chenyangguang Zhang,

Alexey Gavryushin,

Tommaso Di Mario,

Boyang Sun,

Rishabh Dabral,

Leonidas Guibas,

Christian Theobalt,

Marc Pollefeys,

Francis Engelmann,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Delitzas_2026_CVPR, author = {Delitzas, Alexandros and Zhang, Chenyangguang and Gavryushin, Alexey and Di Mario, Tommaso and Sun, Boyang and Dabral, Rishabh and Guibas, Leonidas and Theobalt, Christian and Pollefeys, Marc and Engelmann, Francis and Barath, Daniel}, title = {FUN REC * Reconstructing Functional 3D Scenes from Egocentric Interaction Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28446-28457} }
Revisiting Unknowns: Towards Effective and Efficient Open-Set Active Learning: Chen-Chen Zong,

Yu-Qi Chi,

Xie-Yang Wang,

Yan Cui,

Sheng-Jun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zong_2026_CVPR, author = {Zong, Chen-Chen and Chi, Yu-Qi and Wang, Xie-Yang and Cui, Yan and Huang, Sheng-Jun}, title = {Revisiting Unknowns: Towards Effective and Efficient Open-Set Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17756-17765} }
MoE-GRPO: Optimizing Mixture-of-Experts via Reinforcement Learning in Vision-Language Models: Dohwan Ko,

Jinyoung Park,

Seoung Choi,

Sanghyeok Lee,

Seohyun Lee,

Hyunwoo J. Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Dohwan and Park, Jinyoung and Choi, Seoung and Lee, Sanghyeok and Lee, Seohyun and Kim, Hyunwoo J.}, title = {MoE-GRPO: Optimizing Mixture-of-Experts via Reinforcement Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14957-14967} }
Solving Minimal Problems Without Matrix Inversion Using FFT-Based Interpolation: Haidong Wu,

Snehal Bhayani,

Janne Heikkila; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haidong and Bhayani, Snehal and Heikkila, Janne}, title = {Solving Minimal Problems Without Matrix Inversion Using FFT-Based Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19771-19780} }
Parameter-Efficient Semantic Augmentation for Enhancing Open-Vocabulary Object Detection: Weihao Cao,

Runqi Wang,

Xiaoyue Duan,

Jinchao Zhang,

Ang Yang,

Liping Jing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Weihao and Wang, Runqi and Duan, Xiaoyue and Zhang, Jinchao and Yang, Ang and Jing, Liping}, title = {Parameter-Efficient Semantic Augmentation for Enhancing Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20367-20376} }
RegionFuse: Region-Adaptive Pixel Distribution Learning for Infrared and Visible Image Fusion: Jianghan Xia,

Hong Song,

Jinfu Li,

Yucong Lin,

Shihan Ma,

Jingfan Fan,

Danni Ai,

Tianyu Fu,

Deqiang Xiao,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Jianghan and Song, Hong and Li, Jinfu and Lin, Yucong and Ma, Shihan and Fan, Jingfan and Ai, Danni and Fu, Tianyu and Xiao, Deqiang and Yang, Jian}, title = {RegionFuse: Region-Adaptive Pixel Distribution Learning for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19539-19548} }
Task-Driven Implicit Representations for Automated Design of LiDAR Systems: Nikhil Behari,

Aaron Young,

Tzofi Klinghoffer,

Akshat Dave,

Ramesh Raskar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behari_2026_CVPR, author = {Behari, Nikhil and Young, Aaron and Klinghoffer, Tzofi and Dave, Akshat and Raskar, Ramesh}, title = {Task-Driven Implicit Representations for Automated Design of LiDAR Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24248-24257} }
Proxy3D: Efficient 3D Representations for Vision-Language Models via Semantic Clustering and Alignment: Jerry Jiang,

Haowen Sun,

Denis Gudovskiy,

Yohei Nakata,

Tomoyuki Okuno,

Kurt Keutzer,

Wenzhao Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jerry and Sun, Haowen and Gudovskiy, Denis and Nakata, Yohei and Okuno, Tomoyuki and Keutzer, Kurt and Zheng, Wenzhao}, title = {Proxy3D: Efficient 3D Representations for Vision-Language Models via Semantic Clustering and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23816-23825} }
MV3DIS: Multi-View Mask Matching via 3D Guides for Zero-Shot 3D Instance Segmentation: Yibo Zhao,

Yigong Zhang,

Jin Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yibo and Zhang, Yigong and Xie, Jin}, title = {MV3DIS: Multi-View Mask Matching via 3D Guides for Zero-Shot 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17916-17926} }
InvAD: Inversion-based Reconstruction-Free Anomaly Detection with Diffusion Models: Shunsuke Sakai,

Xiangteng He,

Chunzhi Gu,

Leonid Sigal,

Tatsuhito Hasegawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sakai_2026_CVPR, author = {Sakai, Shunsuke and He, Xiangteng and Gu, Chunzhi and Sigal, Leonid and Hasegawa, Tatsuhito}, title = {InvAD: Inversion-based Reconstruction-Free Anomaly Detection with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21389-21398} }
PHASE-Net: Physics-Grounded Harmonic Attention System for Efficient Remote Photoplethysmography Measurement: Bo Zhao,

Dan Guo,

Junzhe Cao,

Yong Xu,

Bochao Zou,

Tao Tan,

Yue Sun,

Zitong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Bo and Guo, Dan and Cao, Junzhe and Xu, Yong and Zou, Bochao and Tan, Tao and Sun, Yue and Yu, Zitong}, title = {PHASE-Net: Physics-Grounded Harmonic Attention System for Efficient Remote Photoplethysmography Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21198-21207} }
OmniZip: Audio-Guided Dynamic Token Compression for Fast Omnimodal Large Language Models: Keda Tao,

Kele Shao,

Bohan Yu,

Weiqiang Wang,

Jian Liu,

Huan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Keda and Shao, Kele and Yu, Bohan and Wang, Weiqiang and Liu, Jian and Wang, Huan}, title = {OmniZip: Audio-Guided Dynamic Token Compression for Fast Omnimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17682-17692} }
TerraScope: Pixel-Grounded Visual Reasoning for Earth Observation: Yan Shu,

Bin Ren,

Zhitong Xiong,

Xiao Xiang Zhu,

Begüm Demir,

Nicu Sebe,

Paolo Rota; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shu_2026_CVPR, author = {Shu, Yan and Ren, Bin and Xiong, Zhitong and Zhu, Xiao Xiang and Demir, Beg\"um and Sebe, Nicu and Rota, Paolo}, title = {TerraScope: Pixel-Grounded Visual Reasoning for Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16712-16722} }
FedMPT: Federated Multi-Label Prompt Tuning of Vision-Language Models: Xucong Wang,

Pengkun Wang,

Zhe Zhao,

Liheng Yu,

Shuang Wang,

Yang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xucong and Wang, Pengkun and Zhao, Zhe and Yu, Liheng and Wang, Shuang and Wang, Yang}, title = {FedMPT: Federated Multi-Label Prompt Tuning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17226-17236} }
Fine-Grained GRPO for Precise Preference Alignment in Flow Models: Yujie Zhou,

Pengyang Ling,

Jiazi Bu,

Yibin Wang,

Yuhang Zang,

Jiaqi Wang,

Li Niu,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yujie and Ling, Pengyang and Bu, Jiazi and Wang, Yibin and Zang, Yuhang and Wang, Jiaqi and Niu, Li and Zhai, Guangtao}, title = {Fine-Grained GRPO for Precise Preference Alignment in Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20045-20054} }
Attribute-Preserving Pseudo-Labeling for Diffusion-Based Face Swapping: Jiwon Kang,

Yeji Choi,

JoungBin Lee,

Wooseok Jang,

Jinhyeok Choi,

Taekeun Kang,

Yongjae Park,

Myungin Kim,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Jiwon and Choi, Yeji and Lee, JoungBin and Jang, Wooseok and Choi, Jinhyeok and Kang, Taekeun and Park, Yongjae and Kim, Myungin and Kim, Seungryong}, title = {Attribute-Preserving Pseudo-Labeling for Diffusion-Based Face Swapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18651-18661} }
Dynamic Token Reweighting for Robust Vision-Language Models: Tanqiu Jiang,

Jiacheng Liang,

Rongyi Zhu,

Jiawei Zhou,

Fenglong Ma,

Ting Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Tanqiu and Liang, Jiacheng and Zhu, Rongyi and Zhou, Jiawei and Ma, Fenglong and Wang, Ting}, title = {Dynamic Token Reweighting for Robust Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24481-24491} }
Chorus: Multi-Teacher Pretraining for Holistic 3D Gaussian Scene Encoding: Yue Li,

Qi Ma,

Runyi Yang,

Mengjiao Ma,

Bin Ren,

Nikola Popovic,

Nicu Sebe,

Theo Gevers,

Luc Van Gool,

Danda Pani Paudel,

Martin R. Oswald; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yue and Ma, Qi and Yang, Runyi and Ma, Mengjiao and Ren, Bin and Popovic, Nikola and Sebe, Nicu and Gevers, Theo and Van Gool, Luc and Paudel, Danda Pani and Oswald, Martin R.}, title = {Chorus: Multi-Teacher Pretraining for Holistic 3D Gaussian Scene Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21431-21442} }
Portable Active Learning for Object Detection: Rashi Sharma,

Justin Timothy C. Bersamin,

Karthikk Subramanian; [pdf] [arXiv]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Rashi and Bersamin, Justin Timothy C. and Subramanian, Karthikk}, title = {Portable Active Learning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25514-25523} }
LASAR: Towards Spatio-temporal Reasoning with Latent Cognitive Map: Jinzhou Tang,

Sidi Liu,

Waikit Xiu,

Weixing Chen,

Keze Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jinzhou and Liu, Sidi and Xiu, Waikit and Chen, Weixing and Wang, Keze}, title = {LASAR: Towards Spatio-temporal Reasoning with Latent Cognitive Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23880-23890} }
FARMER: Flow AutoRegressive Transformer over Pixels: Guangting Zheng,

Qinyu Zhao,

Tao Yang,

Fei Xiao,

Zhijie Lin,

Jie Wu,

Jiajun Deng,

Yanyong Zhang,

Rui Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Guangting and Zhao, Qinyu and Yang, Tao and Xiao, Fei and Lin, Zhijie and Wu, Jie and Deng, Jiajun and Zhang, Yanyong and Zhu, Rui}, title = {FARMER: Flow AutoRegressive Transformer over Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25730-25741} }
IBISAgent: Reinforcing Pixel-Level Visual Reasoning in MLLMs for Universal Biomedical Object Referring and Segmentation: Yankai Jiang,

Qiaoru Li,

Binlu Xu,

Haoran Sun,

Chao Ding,

Junting Dong,

Yuxiang Cai,

Xuhong Zhang,

Jianwei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yankai and Li, Qiaoru and Xu, Binlu and Sun, Haoran and Ding, Chao and Dong, Junting and Cai, Yuxiang and Zhang, Xuhong and Yin, Jianwei}, title = {IBISAgent: Reinforcing Pixel-Level Visual Reasoning in MLLMs for Universal Biomedical Object Referring and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20996-21005} }
CCCaption: Dual-Reward Reinforcement Learning for Complete and Correct Image Captioning: Zhijiang Tang,

Linhua Wang,

Jiaxin Qi,

Weihao Jiang,

Peng Hou,

Anxiang Zeng,

Jianqiang Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zhijiang and Wang, Linhua and Qi, Jiaxin and Jiang, Weihao and Hou, Peng and Zeng, Anxiang and Huang, Jianqiang}, title = {CCCaption: Dual-Reward Reinforcement Learning for Complete and Correct Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22153-22163} }
LinVideo: A Post-Training Framework towards O(n) Attention in Efficient Video Generation: Yushi Huang,

Xingtong Ge,

Ruihao Gong,

Chengtao Lv,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yushi and Ge, Xingtong and Gong, Ruihao and Lv, Chengtao and Zhang, Jun}, title = {LinVideo: A Post-Training Framework towards O(n) Attention in Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23398-23408} }
Unifying Perception and Action: A Hybrid-Modality Pipeline with Implicit Visual Chain-of-Thought for Robotic Action Generation: Xiangkai Ma,

Lekai Xing,

Han Zhang,

Wenzhong Li,

Sanglu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Xiangkai and Xing, Lekai and Zhang, Han and Li, Wenzhong and Lu, Sanglu}, title = {Unifying Perception and Action: A Hybrid-Modality Pipeline with Implicit Visual Chain-of-Thought for Robotic Action Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22380-22390} }
VOLD: Reasoning Transfer from LLMs to Vision-Language Models via On-Policy Distillation: Walid Bousselham,

Hilde Kuehne,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bousselham_2026_CVPR, author = {Bousselham, Walid and Kuehne, Hilde and Schmid, Cordelia}, title = {VOLD: Reasoning Transfer from LLMs to Vision-Language Models via On-Policy Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26209-26218} }
Scaling-Aware Data Selection for End-to-End Autonomous Driving Systems: Tolga Dimlioglu,

Nadine Chang,

Maying Shen,

Rafid Mahmood,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dimlioglu_2026_CVPR, author = {Dimlioglu, Tolga and Chang, Nadine and Shen, Maying and Mahmood, Rafid and Alvarez, Jose M.}, title = {Scaling-Aware Data Selection for End-to-End Autonomous Driving Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17798-17808} }
A Faster Path to Continual Learning: Wei Li,

Hangjie Yuan,

Zixiang Zhao,

Borui Kang,

Ziwei Liu,

Tao Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wei and Yuan, Hangjie and Zhao, Zixiang and Kang, Borui and Liu, Ziwei and Feng, Tao}, title = {A Faster Path to Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25088-25098} }
PaQ-DETR: Learning Pattern and Quality-Aware Dynamic Queries for Object Detection: Zhengjian Kang,

Jun Zhuang,

Kangtong Mo,

Qi Chen,

Rui Liu,

Ye Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Zhengjian and Zhuang, Jun and Mo, Kangtong and Chen, Qi and Liu, Rui and Zhang, Ye}, title = {PaQ-DETR: Learning Pattern and Quality-Aware Dynamic Queries for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25504-25513} }
VCU-Bridge: Hierarchical Visual Connotation Understanding via Semantic Bridging: Ming Zhong,

Yuanlei Wang,

Liuzhou Zhang,

Ruichuan An,

Renrui Zhang,

Hao Liang,

Ming Lu,

Ying Shen,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Ming and Wang, Yuanlei and Zhang, Liuzhou and An, Ruichuan and Zhang, Renrui and Liang, Hao and Lu, Ming and Shen, Ying and Zhang, Wentao}, title = {VCU-Bridge: Hierarchical Visual Connotation Understanding via Semantic Bridging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26187-26197} }
Seeing What Matters: Visual Preference Policy Optimization for Visual Generation: Ziqi Ni,

Yuanzhi Liang,

Rui Li,

Yi Zhou,

Haibin Huang,

Chi Zhang,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Ziqi and Liang, Yuanzhi and Li, Rui and Zhou, Yi and Huang, Haibin and Zhang, Chi and Li, Xuelong}, title = {Seeing What Matters: Visual Preference Policy Optimization for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27260-27269} }
When Transformers Meet Mamba: A Hybrid Transformer-Mamba Network for Video Object Detection: Qiang Qi,

Xiao Wang,

Zongyuan Du,

Yu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Qiang and Wang, Xiao and Du, Zongyuan and Zhang, Yu}, title = {When Transformers Meet Mamba: A Hybrid Transformer-Mamba Network for Video Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18492-18502} }
HorizonForge: Driving Scene Editing with Any Trajectories and Any Vehicles: Yifan Wang,

Francesco Pittaluga,

Zaid Tasneem,

Chenyu You,

Manmohan Chandraker,

Ziyu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yifan and Pittaluga, Francesco and Tasneem, Zaid and You, Chenyu and Chandraker, Manmohan and Jiang, Ziyu}, title = {HorizonForge: Driving Scene Editing with Any Trajectories and Any Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24895-24905} }
EMAD: Evidence-Centric Grounded Multimodal Diagnosis for Alzheimer's Disease: Qiuhui Chen,

Xuancheng Yao,

Zhenglei Zhou,

Xinyue Hu,

Yi Hong; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qiuhui and Yao, Xuancheng and Zhou, Zhenglei and Hu, Xinyue and Hong, Yi}, title = {EMAD: Evidence-Centric Grounded Multimodal Diagnosis for Alzheimer's Disease}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23031-23040} }
Activation Matters: Test-time Activated Negative Labels for OOD Detection with Vision-Language Models: Yabin Zhang,

Maya Varma,

Yunhe Gao,

Jean-Benoit Delbrouck,

Jiaming Liu,

Chong Wang,

Curtis Langlotz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yabin and Varma, Maya and Gao, Yunhe and Delbrouck, Jean-Benoit and Liu, Jiaming and Wang, Chong and Langlotz, Curtis}, title = {Activation Matters: Test-time Activated Negative Labels for OOD Detection with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17462-17473} }
FisherPoser: Human Motion Estimation from Sparse Observations with Hierarchical Region-Wise Fisher-Matrix Uncertainty Modeling: Songpengcheng Xia,

Qingyu Zhang,

Zhuo Su,

Jiarui Yang,

Zengyuan Lai,

Qi Wu,

Ling Pei; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Songpengcheng and Zhang, Qingyu and Su, Zhuo and Yang, Jiarui and Lai, Zengyuan and Wu, Qi and Pei, Ling}, title = {FisherPoser: Human Motion Estimation from Sparse Observations with Hierarchical Region-Wise Fisher-Matrix Uncertainty Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28413-28423} }
Action Motifs: Self-Supervised Hierarchical Representation of Human Body Movements: Genki Kinoshita,

Shu Nakamura,

Ryo Kawahara,

Shohei Nobuhara,

Yasutomo Kawanishi,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kinoshita_2026_CVPR, author = {Kinoshita, Genki and Nakamura, Shu and Kawahara, Ryo and Nobuhara, Shohei and Kawanishi, Yasutomo and Nishino, Ko}, title = {Action Motifs: Self-Supervised Hierarchical Representation of Human Body Movements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20139-20148} }
S2FT: Parameter-Efficient Fine-Tuning in Sparse Spectrum Domain: Baoquan Zhang,

Zhehao Yu,

Lisai Zhang,

Kenghong Lin,

Tianran Chen,

Yuxi Sun,

Yunming Ye,

Yao He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Baoquan and Yu, Zhehao and Zhang, Lisai and Lin, Kenghong and Chen, Tianran and Sun, Yuxi and Ye, Yunming and He, Yao}, title = {S2FT: Parameter-Efficient Fine-Tuning in Sparse Spectrum Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20191-20201} }
DarkAct: A RGB-Thermal Dataset and Fusion Framework for Multimodal Low-Light Action Recognition: Yuanjun Tan,

Aoran Xiao,

Liqian Deng,

Zhigang Tu; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Yuanjun and Xiao, Aoran and Deng, Liqian and Tu, Zhigang}, title = {DarkAct: A RGB-Thermal Dataset and Fusion Framework for Multimodal Low-Light Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27345-27356} }
PAF: Perturbation-Aware Filtering for Open-Set Semi-Supervised Learning: Yinan Han,

Qing-Yuan Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Yinan and Jiang, Qing-Yuan}, title = {PAF: Perturbation-Aware Filtering for Open-Set Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24803-24812} }
M3Grounder: Mask-Based Multi-Span and Multi-Granular Grounding for Document QA: Venkata Kesav Venna,

Sai Madhusudan Gunda,

Jyothi Swaroopa Jinka,

Hrithik Sagar Rachakonda,

Anirudh Srinivasan,

Ravi Kiran Sarvadevabhatla; [pdf] [supp]
[bibtex]
@InProceedings{Venna_2026_CVPR, author = {Venna, Venkata Kesav and Gunda, Sai Madhusudan and Jinka, Jyothi Swaroopa and Rachakonda, Hrithik Sagar and Srinivasan, Anirudh and Sarvadevabhatla, Ravi Kiran}, title = {M3Grounder: Mask-Based Multi-Span and Multi-Granular Grounding for Document QA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23685-23695} }
Towards Persistence: Learning Topological Constraints for Event-based Small Object Detection: Shiman He,

Nuo Chen,

Xinyi Ying,

Yihang Luo,

Yangsi Shi,

Zaiping Lin,

Miao Li; [pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Shiman and Chen, Nuo and Ying, Xinyi and Luo, Yihang and Shi, Yangsi and Lin, Zaiping and Li, Miao}, title = {Towards Persistence: Learning Topological Constraints for Event-based Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22291-22300} }
RF4D:Neural Radar Fields for Novel View Synthesis in Outdoor Dynamic Scenes: Jiarui Zhang,

Zhihao Li,

Chong Wang,

Bihan Wen; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiarui and Li, Zhihao and Wang, Chong and Wen, Bihan}, title = {RF4D:Neural Radar Fields for Novel View Synthesis in Outdoor Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15387-15397} }
Negative Binomial Variational Autoencoders for Overdispersed Latent Modeling: Yixuan Zhang,

Jinhao Sheng,

Wenxin Zhang,

Quyu Kong,

Feng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yixuan and Sheng, Jinhao and Zhang, Wenxin and Kong, Quyu and Zhou, Feng}, title = {Negative Binomial Variational Autoencoders for Overdispersed Latent Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16289-16298} }
Camouflage-aware Image-Text Retrieval via Expert Collaboration: Yao Jiang,

Zhongkuan Mao,

Xuan Wu,

Keren Fu,

Qijun Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yao and Mao, Zhongkuan and Wu, Xuan and Fu, Keren and Zhao, Qijun}, title = {Camouflage-aware Image-Text Retrieval via Expert Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23933-23943} }
AirSim360: A Panoramic Simulation Platform within Drone View: Xian Ge,

Yuling Pan,

Yuhang Zhang,

Xiang Li,

Weijun Zhang,

Dizhe Zhang,

Zhaoliang Wan,

Xin Lin,

Xiangkai Zhang,

Juntao Liang,

Xiangtai Li,

WenJie Jiang,

Bo Du,

Ming-Hsuan Yang,

Lu Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Xian and Pan, Yuling and Zhang, Yuhang and Li, Xiang and Zhang, Weijun and Zhang, Dizhe and Wan, Zhaoliang and Lin, Xin and Zhang, Xiangkai and Liang, Juntao and Li, Xiangtai and Jiang, WenJie and Du, Bo and Yang, Ming-Hsuan and Qi, Lu}, title = {AirSim360: A Panoramic Simulation Platform within Drone View}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26931-26940} }
Rethinking Position Embedding as a Context Controller for Multi-Reference and Multi-Shot Video Generation: Binyuan Huang,

Yuning Lu,

Weinan Jia,

Hualiang Wang,

Mu Liu,

Daiqing Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Binyuan and Lu, Yuning and Jia, Weinan and Wang, Hualiang and Liu, Mu and Yang, Daiqing}, title = {Rethinking Position Embedding as a Context Controller for Multi-Reference and Multi-Shot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23304-23313} }
ShadowDraw: From Any Object to Shadow-Drawing Compositional Art: Rundong Luo,

Noah Snavely,

Wei-Chiu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Rundong and Snavely, Noah and Ma, Wei-Chiu}, title = {ShadowDraw: From Any Object to Shadow-Drawing Compositional Art}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24428-24437} }
Uni-DAD: Unified Distillation and Adaptation of Diffusion Models for Few-step Few-shot Image Generation: Yara Bahram,

Mélodie Desbos,

Mohammadhadi Shateri,

Eric Granger; [pdf] [arXiv]
[bibtex]
@InProceedings{Bahram_2026_CVPR, author = {Bahram, Yara and Desbos, M\'elodie and Shateri, Mohammadhadi and Granger, Eric}, title = {Uni-DAD: Unified Distillation and Adaptation of Diffusion Models for Few-step Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26635-26645} }
AE2VID: Event-based Video Reconstruction via Aperture Modulation: Chenxu Bai,

Boyu Li,

Peiqi Duan,

Xinyu Zhou,

Hanyue Lou,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Chenxu and Li, Boyu and Duan, Peiqi and Zhou, Xinyu and Lou, Hanyue and Shi, Boxin}, title = {AE2VID: Event-based Video Reconstruction via Aperture Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15115-15124} }
Verifying Neural Network Robustness with Dual Perturbations: Hai Duong,

Lam Nguyen,

Thanh Le,

ThanhVu Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Duong_2026_CVPR, author = {Duong, Hai and Nguyen, Lam and Le, Thanh and Nguyen, ThanhVu}, title = {Verifying Neural Network Robustness with Dual Perturbations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27916-27925} }
Event-Illumination Collaborative Low-light Image Enhancement with a High-resolution Real-world Dataset: Senyan Xu,

Zhijing Sun,

Kean Liu,

Xin Lu,

Ruixuan Jiang,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Senyan and Sun, Zhijing and Liu, Kean and Lu, Xin and Jiang, Ruixuan and Fu, Xueyang and Zha, Zheng-Jun}, title = {Event-Illumination Collaborative Low-light Image Enhancement with a High-resolution Real-world Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22270-22280} }
Defending Unauthorized Model Merging via Dual-Stage Weight Protection: Wei-Jia Chen,

Min-Yan Tsai,

Cheng-Yi Lee,

Chia-Mu Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Wei-Jia and Tsai, Min-Yan and Lee, Cheng-Yi and Yu, Chia-Mu}, title = {Defending Unauthorized Model Merging via Dual-Stage Weight Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27926-27935} }
ThinkGen: Generalized Thinking for Visual Generation: Siyu Jiao,

Yiheng Lin,

Yujie Zhong,

Qi She,

Wei Zhou,

Xiaohan Lan,

Zilong Huang,

Fei Yu,

Yingchen Yu,

Yunqing Zhao,

Yao Zhao,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Siyu and Lin, Yiheng and Zhong, Yujie and She, Qi and Zhou, Wei and Lan, Xiaohan and Huang, Zilong and Yu, Fei and Yu, Yingchen and Zhao, Yunqing and Zhao, Yao and Wei, Yunchao}, title = {ThinkGen: Generalized Thinking for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14713-14723} }
MoEActok: A MoE-based Action Tokenizer for Vision-Language-Action Models: Chunpu Xu,

Zhixuan Liang,

Tianshuo Yang,

Chi-Min Chan,

Yang Xiao,

Jessie Wang,

Xiaokang Yang,

Yao Mu; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Chunpu and Liang, Zhixuan and Yang, Tianshuo and Chan, Chi-Min and Xiao, Yang and Wang, Jessie and Yang, Xiaokang and Mu, Yao}, title = {MoEActok: A MoE-based Action Tokenizer for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28042-28051} }
ORCA: Orchestrated Reasoning with Collaborative Agents for Document Visual Question Answering: Aymen Lassoued,

Mohamed Ali Souibgui,

Yousri Kessentini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lassoued_2026_CVPR, author = {Lassoued, Aymen and Souibgui, Mohamed Ali and Kessentini, Yousri}, title = {ORCA: Orchestrated Reasoning with Collaborative Agents for Document Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19475-19486} }
Improving Calibration in Test-Time Prompt Tuning for Vision-Language Models via Data-Free Flatness-Aware Prompt Pretraining: Hyeonseo Jang,

Jaebyeong Jeon,

Joong-Won Hwang,

Kibok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Hyeonseo and Jeon, Jaebyeong and Hwang, Joong-Won and Lee, Kibok}, title = {Improving Calibration in Test-Time Prompt Tuning for Vision-Language Models via Data-Free Flatness-Aware Prompt Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24300-24309} }
Cross-Domain Demo-to-Code via Neurosymbolic Counterfactual Reasoning: Jooyoung Kim,

Wonje Choi,

Younguk Song,

Honguk Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jooyoung and Choi, Wonje and Song, Younguk and Woo, Honguk}, title = {Cross-Domain Demo-to-Code via Neurosymbolic Counterfactual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18848-18858} }
Aligning What Vision-Language Models See and Perceive with Adaptive Information Flow: Chengxin Liu,

Wonseok Choi,

Chenshuang Zhang,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chengxin and Choi, Wonseok and Zhang, Chenshuang and Oh, Tae-Hyun}, title = {Aligning What Vision-Language Models See and Perceive with Adaptive Information Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24706-24715} }
Symphony: A Cognitively-Inspired Multi-Agent System for Long-Video Understanding: Haiyang Yan,

Hongyun Zhou,

Peng Xu,

Xiaoxue Feng,

Mengyi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haiyang and Zhou, Hongyun and Xu, Peng and Feng, Xiaoxue and Liu, Mengyi}, title = {Symphony: A Cognitively-Inspired Multi-Agent System for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24031-24041} }
MoCha: End-to-End Video Character Replacement without Structural Guidance: Zhengbo Xu,

Jie Ma,

Ziheng Wang,

Zhan Peng,

Jun Liang,

Jing Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhengbo and Ma, Jie and Wang, Ziheng and Peng, Zhan and Liang, Jun and Li, Jing}, title = {MoCha: End-to-End Video Character Replacement without Structural Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16279-16288} }
UniVBench: Towards Unified Evaluation for Video Foundation Models: Jianhui Wei,

Xiaotian Zhang,

Yichen Li,

Yuan Wang,

Yan Zhang,

Ziyi Chen,

Zhihang Tang,

Wei Xu,

Zuozhu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jianhui and Zhang, Xiaotian and Li, Yichen and Wang, Yuan and Zhang, Yan and Chen, Ziyi and Tang, Zhihang and Xu, Wei and Liu, Zuozhu}, title = {UniVBench: Towards Unified Evaluation for Video Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25654-25666} }
CoVFT: Context-aware Visual Fine-tuning for Multimodal Large Language Models: Nan Zhou,

Huiqun Wang,

Yaoyan Zheng,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Nan and Wang, Huiqun and Zheng, Yaoyan and Huang, Di}, title = {CoVFT: Context-aware Visual Fine-tuning for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24341-24351} }
Faster-GS: Analyzing and Improving Gaussian Splatting Optimization: Florian Hahlbohm,

Linus Franke,

Martin Eisemann,

Marcus Magnor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hahlbohm_2026_CVPR, author = {Hahlbohm, Florian and Franke, Linus and Eisemann, Martin and Magnor, Marcus}, title = {Faster-GS: Analyzing and Improving Gaussian Splatting Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18946-18957} }
PROMO: Promptable Outfitting for Efficient High-Fidelity Virtual Try-On: Haohua Chen,

Tianze Zhou,

Wei Zhu,

Runqi Wang,

Yandong Guan,

Dejia Song,

Yibo Chen,

Xu Tang,

Yao Hu,

Lu Sheng,

Zhiyong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haohua and Zhou, Tianze and Zhu, Wei and Wang, Runqi and Guan, Yandong and Song, Dejia and Chen, Yibo and Tang, Xu and Hu, Yao and Sheng, Lu and Wu, Zhiyong}, title = {PROMO: Promptable Outfitting for Efficient High-Fidelity Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16074-16084} }
Few-shot Acoustic Synthesis with Multimodal Flow Matching: Amandine Brunetto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brunetto_2026_CVPR, author = {Brunetto, Amandine}, title = {Few-shot Acoustic Synthesis with Multimodal Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15773-15783} }
Anatomica: Localized Control over Geometric and Topological Properties for Anatomical Diffusion Models: Karim Kadry,

Abdalla Abdelwahed,

Ajay Manicka,

Naravich Chutisilp,

Farhad R. Nezami,

Elazer R. Edelman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kadry_2026_CVPR, author = {Kadry, Karim and Abdelwahed, Abdalla and Manicka, Ajay and Chutisilp, Naravich and Nezami, Farhad R. and Edelman, Elazer R.}, title = {Anatomica: Localized Control over Geometric and Topological Properties for Anatomical Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15595-15605} }
UniCompress: Token Compression for Unified Vision-Language Understanding and Generation: Ziyao Wang,

Chen Chen,

Jingtao Li,

Weiming Zhuang,

Jiabo Huang,

Ang Li,

Lingjuan Lyu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyao and Chen, Chen and Li, Jingtao and Zhuang, Weiming and Huang, Jiabo and Li, Ang and Lyu, Lingjuan}, title = {UniCompress: Token Compression for Unified Vision-Language Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24663-24674} }
MooCap: A Multi-View Benchmark for Cow-Object-Human Interaction and Behavior Dynamics: Ian Noronha,

Heather Neave,

Upinder Kaur; [pdf] [supp]
[bibtex]
@InProceedings{Noronha_2026_CVPR, author = {Noronha, Ian and Neave, Heather and Kaur, Upinder}, title = {MooCap: A Multi-View Benchmark for Cow-Object-Human Interaction and Behavior Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27324-27333} }
AURA: Multi-modal Shared Autonomy for Urban Navigation: Yukai Ma,

Honglin He,

Selina Song,

Wayne Wu,

Bolei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yukai and He, Honglin and Song, Selina and Wu, Wayne and Zhou, Bolei}, title = {AURA: Multi-modal Shared Autonomy for Urban Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18171-18181} }
Geometry-Aligned and Anomaly-Aware Reconstruction for 3D Anomaly Detection: Linchun Wu,

Qin Zou,

Yuanhao Yue,

Zhongyuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Linchun and Zou, Qin and Yue, Yuanhao and Wang, Zhongyuan}, title = {Geometry-Aligned and Anomaly-Aware Reconstruction for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14648-14657} }
CogDriver: Integrating Cognitive Inertia for Temporally Coherent Planning in Autonomous Driving: Pei Liu,

Qingtian Ning,

Xinyan Lu,

Haipeng Liu,

Weiliang Ma,

Dangen She,

Xianpeng Lang,

Jun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Pei and Ning, Qingtian and Lu, Xinyan and Liu, Haipeng and Ma, Weiliang and She, Dangen and Lang, Xianpeng and Ma, Jun}, title = {CogDriver: Integrating Cognitive Inertia for Temporally Coherent Planning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18150-18160} }
WISER: Wider Search, Deeper Thinking, and Adaptive Fusion for Training-Free Zero-Shot Composed Image Retrieval: Tianyue Wang,

Leigang Qu,

Tianyu Yang,

Xiangzhao Hao,

Yifan Xu,

Haiyun Guo,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Tianyue and Qu, Leigang and Yang, Tianyu and Hao, Xiangzhao and Xu, Yifan and Guo, Haiyun and Wang, Jinqiao}, title = {WISER: Wider Search, Deeper Thinking, and Adaptive Fusion for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16865-16875} }
TraceGen: World Modeling in 3D Trace Space Enables Learning from Cross-Embodiment Videos: Seungjae Lee,

Yoonkyo Jung,

Inkook Chun,

Yao-Chih Lee,

Zikui Cai,

Hongjia Huang,

Aayush Talreja,

Tan Dao,

Yongyuan Liang,

Jia-Bin Huang,

Furong Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seungjae and Jung, Yoonkyo and Chun, Inkook and Lee, Yao-Chih and Cai, Zikui and Huang, Hongjia and Talreja, Aayush and Dao, Tan and Liang, Yongyuan and Huang, Jia-Bin and Huang, Furong}, title = {TraceGen: World Modeling in 3D Trace Space Enables Learning from Cross-Embodiment Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20721-20731} }
Frequency Switching Mechanism for Parameter-Efficient Multi-Task Learning: Shih-Wen Liu,

Yen-Chang Chen,

Wei-Ta Chu,

Fu-En Yang,

Yu-Chiang Frank Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shih-Wen and Chen, Yen-Chang and Chu, Wei-Ta and Yang, Fu-En and Wang, Yu-Chiang Frank}, title = {Frequency Switching Mechanism for Parameter-Efficient Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20273-20282} }
FluidGaussian: Propagating Simulation-Based Uncertainty Toward Functionally-Intelligent 3D Reconstruction: Yuqiu Liu,

Jialin Song,

Marissa Ramirez de Chanlatte,

Rochishnu Chowdhury,

Rushil Paresh Desai,

Wuyang Chen,

Daniel Martin,

Michael W. Mahoney; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuqiu and Song, Jialin and de Chanlatte, Marissa Ramirez and Chowdhury, Rochishnu and Desai, Rushil Paresh and Chen, Wuyang and Martin, Daniel and Mahoney, Michael W.}, title = {FluidGaussian: Propagating Simulation-Based Uncertainty Toward Functionally-Intelligent 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15421-15431} }
CVA: Context-aware Video-text Alignment for Video Temporal Grounding: Sungho Moon,

Seunghun Lee,

Jiwan Seo,

Sunghoon Im; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, Sungho and Lee, Seunghun and Seo, Jiwan and Im, Sunghoon}, title = {CVA: Context-aware Video-text Alignment for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17578-17587} }
PAS: A Training-Free Stabilizer for Temporal Encoding in Video LLMs: Bowen Sun,

Yujun Cai,

Ming-Hsuan Yang,

Hang Wu,

Yiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Bowen and Cai, Yujun and Yang, Ming-Hsuan and Wu, Hang and Wang, Yiwei}, title = {PAS: A Training-Free Stabilizer for Temporal Encoding in Video LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14471-14480} }
CanonCGT: Reference-Based Color Grading via Canonical Pivot Representation: Jinwon Ko,

Keunsoo Ko,

Chang-Su Kim; [pdf] [supp]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Jinwon and Ko, Keunsoo and Kim, Chang-Su}, title = {CanonCGT: Reference-Based Color Grading via Canonical Pivot Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15486-15495} }
Every Error has Its Magnitude: Asymmetric Mistake Severity Training for Multiclass Multiple Instance Learning: Sungrae Hong,

Jiwon Jeong,

Jisu Shin,

Donghee Han,

Sol Lee,

Kyungeun Kim,

Mun Yong Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Sungrae and Jeong, Jiwon and Shin, Jisu and Han, Donghee and Lee, Sol and Kim, Kyungeun and Yi, Mun Yong}, title = {Every Error has Its Magnitude: Asymmetric Mistake Severity Training for Multiclass Multiple Instance Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28244-28253} }
Thinking in 360deg: Humanoid Visual Search in the Wild: Heyang Yu,

Yinan Han,

Xiangyu Zhang,

Baiqiao Yin,

Bowen Chang,

Xiangyu Han,

Xinhao Liu,

Jing Zhang,

Marco Pavone,

Chen Feng,

Saining Xie,

Yiming Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Heyang and Han, Yinan and Zhang, Xiangyu and Yin, Baiqiao and Chang, Bowen and Han, Xiangyu and Liu, Xinhao and Zhang, Jing and Pavone, Marco and Feng, Chen and Xie, Saining and Li, Yiming}, title = {Thinking in 360deg: Humanoid Visual Search in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22445-22455} }
Towards Unified Human Perception and Machine Understanding: Token Flow Guided Compression Framework: Li Xu,

Yingfu Zhang,

Kepeng Xu,

Gang He,

Yunsong Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Li and Zhang, Yingfu and Xu, Kepeng and He, Gang and Li, Yunsong}, title = {Towards Unified Human Perception and Machine Understanding: Token Flow Guided Compression Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17630-17640} }
Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting: Xinhang Liu,

Pedro Miraldo,

Suhas Lohit,

Huaizu Jiang,

Naoko Sawada,

Yu-Wing Tai,

Chi-Keung Tang,

Moitreya Chatterjee; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinhang and Miraldo, Pedro and Lohit, Suhas and Jiang, Huaizu and Sawada, Naoko and Tai, Yu-Wing and Tang, Chi-Keung and Chatterjee, Moitreya}, title = {Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14600-14611} }
Generalizable Video Quality Assessment via Weak-to-Strong Learning: Linhan Cao,

Wei Sun,

Xiangyang Zhu,

Kaiwei Zhang,

Jun Jia,

Yicong Peng,

Dandan Zhu,

Guangtao Zhai,

Xiongkuo Min; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Linhan and Sun, Wei and Zhu, Xiangyang and Zhang, Kaiwei and Jia, Jun and Peng, Yicong and Zhu, Dandan and Zhai, Guangtao and Min, Xiongkuo}, title = {Generalizable Video Quality Assessment via Weak-to-Strong Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25578-25588} }
VisPlay: Self-Evolving Vision-Language Models: Yicheng He,

Chengsong Huang,

Zongxia Li,

Jiaxin Huang,

Yonghui Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yicheng and Huang, Chengsong and Li, Zongxia and Huang, Jiaxin and Yang, Yonghui}, title = {VisPlay: Self-Evolving Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26274-26284} }
DeepScan: A Training-Free Framework for Visually Grounded Reasoning in Large Vision-Language Models: Yangfu Li,

Hongjian Zhan,

Jiawei Chen,

Yuning Gong,

Qi Liu,

Yue Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yangfu and Zhan, Hongjian and Chen, Jiawei and Gong, Yuning and Liu, Qi and Lu, Yue}, title = {DeepScan: A Training-Free Framework for Visually Grounded Reasoning in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19155-19164} }
APEX: A Decoupled Memory-based Explorer for Asynchronous Aerial Object Goal Navigation: Daoxuan Zhang,

Ping Chen,

Xiaobo Xia,

Xiu Su,

Ruichen Zhen,

Jianqiang Xiao,

Shuo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Daoxuan and Chen, Ping and Xia, Xiaobo and Su, Xiu and Zhen, Ruichen and Xiao, Jianqiang and Yang, Shuo}, title = {APEX: A Decoupled Memory-based Explorer for Asynchronous Aerial Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15232-15242} }
Rationale-Enhanced Decoding for Multi-modal Chain-of-Thought: Shin'ya Yamaguchi,

Kosuke Nishida,

Daiki Chijiwa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamaguchi_2026_CVPR, author = {Yamaguchi, Shin'ya and Nishida, Kosuke and Chijiwa, Daiki}, title = {Rationale-Enhanced Decoding for Multi-modal Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19241-19252} }
D$^2$-FOSA: Dual-Diffusion Guided EEG-to-Image Reconstruction with Frequency-Oriented Semantic Alignment: Chenglong Yu,

Shuai Shen,

Xiangsheng Li,

Yang Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Chenglong and Shen, Shuai and Li, Xiangsheng and Li, Yang}, title = {D\${\textasciicircum}2\$-FOSA: Dual-Diffusion Guided EEG-to-Image Reconstruction with Frequency-Oriented Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26698-26710} }
Spherical Voronoi: Directional Appearance as a Differentiable Partition of the Sphere: Francesco Di Sario,

Daniel Rebain,

Dor Verbin,

Marco Grangetto,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Di_Sario_2026_CVPR, author = {Di Sario, Francesco and Rebain, Daniel and Verbin, Dor and Grangetto, Marco and Tagliasacchi, Andrea}, title = {Spherical Voronoi: Directional Appearance as a Differentiable Partition of the Sphere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22529-22538} }
Test-time Ego-Exo-centric Adaptation for Action Anticipation via Multi-Label Prototype Growing and Dual-Clue Consistency: Zhaofeng Shi,

Heqian Qiu,

Lanxiao Wang,

Qingbo Wu,

Fanman Meng,

Lili Pan,

Hongliang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhaofeng and Qiu, Heqian and Wang, Lanxiao and Wu, Qingbo and Meng, Fanman and Pan, Lili and Li, Hongliang}, title = {Test-time Ego-Exo-centric Adaptation for Action Anticipation via Multi-Label Prototype Growing and Dual-Clue Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16988-16999} }
DreamStereo: Towards Real-Time Stereo Inpainting for HD Videos: Yuan Huang,

Sijie Zhao,

Jing Cheng,

Hao Xu,

Shaohui Jiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yuan and Zhao, Sijie and Cheng, Jing and Xu, Hao and Jiao, Shaohui}, title = {DreamStereo: Towards Real-Time Stereo Inpainting for HD Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25393-25402} }
Boosting Reasoning in Large Multimodal Models via Activation Replay: Yun Xing,

Xiaobin Hu,

Qingdong He,

Jiangning Zhang,

Shuicheng Yan,

Shijian Lu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Yun and Hu, Xiaobin and He, Qingdong and Zhang, Jiangning and Yan, Shuicheng and Lu, Shijian and Jiang, Yu-Gang}, title = {Boosting Reasoning in Large Multimodal Models via Activation Replay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19229-19240} }
Enhancing Part-Level Point Grounding for Any Open-Source MLLMs: Jin-Cheng Jhang,

Fu-En Wang,

Xin Yang,

Nan Qiao,

Lu Xia,

Min Sun,

Cheng-Hao Kuo; [pdf] [supp]
[bibtex]
@InProceedings{Jhang_2026_CVPR, author = {Jhang, Jin-Cheng and Wang, Fu-En and Yang, Xin and Qiao, Nan and Xia, Lu and Sun, Min and Kuo, Cheng-Hao}, title = {Enhancing Part-Level Point Grounding for Any Open-Source MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22900-22909} }
RoadSceneBench: A Lightweight Benchmark for Mid-Level Road Scene Understanding: Xiyan Liu,

Han Wang,

Yuhu Wang,

Junjie Cai,

Zhe Cao,

Jianzhong Yang,

Zhen Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiyan and Wang, Han and Wang, Yuhu and Cai, Junjie and Cao, Zhe and Yang, Jianzhong and Lu, Zhen}, title = {RoadSceneBench: A Lightweight Benchmark for Mid-Level Road Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23720-23729} }
From Inpainting to Layer Decomposition: Repurposing Generative Inpainting Models for Image Layer Decomposition: Jingxi Chen,

Yixiao Zhang,

Xiaoye Qian,

Zongxia Li,

Cornelia Fermuller,

Caren Chen,

Yiannis Aloimonos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jingxi and Zhang, Yixiao and Qian, Xiaoye and Li, Zongxia and Fermuller, Cornelia and Chen, Caren and Aloimonos, Yiannis}, title = {From Inpainting to Layer Decomposition: Repurposing Generative Inpainting Models for Image Layer Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16054-16063} }
AeroDGS: Physically Consistent Dynamic Gaussian Splatting for Single-Sequence Aerial 4D Reconstruction: Hanyang Liu,

Rongjun Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hanyang and Qin, Rongjun}, title = {AeroDGS: Physically Consistent Dynamic Gaussian Splatting for Single-Sequence Aerial 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19011-19021} }
SMVRT: Implicit Human 3D Modeling Using Sparse Multi-View Volumetric Reconstruction with Transformer Fusion: Chuanmao Fan,

Chenxi Zhao,

Ye Duan; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Chuanmao and Zhao, Chenxi and Duan, Ye}, title = {SMVRT: Implicit Human 3D Modeling Using Sparse Multi-View Volumetric Reconstruction with Transformer Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14556-14566} }
HCL-FF: Hierarchical and Contrastive Learning for Forward-Forward Algorithm: Jie-En Yao,

Hong-En Chen,

C.-C. Jay Kuo; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Jie-En and Chen, Hong-En and Kuo, C.-C. Jay}, title = {HCL-FF: Hierarchical and Contrastive Learning for Forward-Forward Algorithm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27429-27438} }
Resolving the Stability-Plasticity Dilemma in Reinforcement Learning via Complementary Continual Critics: Bo Sun,

Peixi Peng,

Guang Tan,

Haoran Xu,

Yaokun Li,

Yiqian Chang,

Shuaixian Wang,

Luntong Li; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Bo and Peng, Peixi and Tan, Guang and Xu, Haoran and Li, Yaokun and Chang, Yiqian and Wang, Shuaixian and Li, Luntong}, title = {Resolving the Stability-Plasticity Dilemma in Reinforcement Learning via Complementary Continual Critics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22348-22357} }
PACT: Phase-Like Transition Constraints in Adapter-Based Continual Learning of Vision-Language Models: Xuan Wang,

Guiguang Ding,

Jungong Han; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuan and Ding, Guiguang and Han, Jungong}, title = {PACT: Phase-Like Transition Constraints in Adapter-Based Continual Learning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17999-18009} }
AnchorFlow: Training-Free 3D Editing via Latent Anchor-Aligned Flows: Zhenglin Zhou,

Fan Ma,

Chengzhuo Gui,

Xiaobo Xia,

Hehe Fan,

Yi Yang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhenglin and Ma, Fan and Gui, Chengzhuo and Xia, Xiaobo and Fan, Hehe and Yang, Yi and Chua, Tat-Seng}, title = {AnchorFlow: Training-Free 3D Editing via Latent Anchor-Aligned Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14387-14397} }
Recurrent Video Masked Autoencoders: Daniel Zoran,

Nikhil Parthasarathy,

Yi Yang,

Drew A Hudson,

João Carreira,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zoran_2026_CVPR, author = {Zoran, Daniel and Parthasarathy, Nikhil and Yang, Yi and A Hudson, Drew and Carreira, Jo\~ao and Zisserman, Andrew}, title = {Recurrent Video Masked Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17744-17755} }
GardenDesigner: Encoding Aesthetic Principles into Jiangnan Garden Construction via a Chain of Agents: Mengtian Li,

Fan Yang,

Ruixue Xiong,

Yiyan Fan,

Zhifeng Xie,

Zeyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Yang, Fan and Xiong, Ruixue and Fan, Yiyan and Xie, Zhifeng and Wang, Zeyu}, title = {GardenDesigner: Encoding Aesthetic Principles into Jiangnan Garden Construction via a Chain of Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24417-24427} }
Beyond Caption-Based Queries in Video Moment Retrieval: David Pujol-Perich,

Albert Clapés,

Dima Damen,

Sergio Escalera,

Michael Wray; [pdf] [supp]
[bibtex]
@InProceedings{Pujol-Perich_2026_CVPR, author = {Pujol-Perich, David and Clap\'es, Albert and Damen, Dima and Escalera, Sergio and Wray, Michael}, title = {Beyond Caption-Based Queries in Video Moment Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18545-18554} }
TEXTRIX: Latent Attribute Grid for Native Texture Generation and Beyond: Yifei Zeng,

Yajie Bao,

Jiachen Qian,

Shuang Wu,

Youtian Lin,

Hao Zhu,

Buyu Li,

Feihu Zhang,

Xun Cao,

Yao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yifei and Bao, Yajie and Qian, Jiachen and Wu, Shuang and Lin, Youtian and Zhu, Hao and Li, Buyu and Zhang, Feihu and Cao, Xun and Yao, Yao}, title = {TEXTRIX: Latent Attribute Grid for Native Texture Generation and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27104-27113} }
CHEEM: Continual Learning by Reuse, New, Adapt and Skip - A Hierarchical Exploration-Exploitation Approach: Chinmay Savadikar,

Michelle Dai,

Tianfu Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Savadikar_2026_CVPR, author = {Savadikar, Chinmay and Dai, Michelle and Wu, Tianfu}, title = {CHEEM: Continual Learning by Reuse, New, Adapt and Skip - A Hierarchical Exploration-Exploitation Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25066-25076} }
Mitigating Objectness Bias and Region-to-Text Misalignment for Open-Vocabulary Panoptic Segmentation: Nikolay Kormushev,

Josip Šarić,

Matej Kristan; [pdf] [supp]
[bibtex]
@InProceedings{Kormushev_2026_CVPR, author = {Kormushev, Nikolay and \v{S}ari\'c, Josip and Kristan, Matej}, title = {Mitigating Objectness Bias and Region-to-Text Misalignment for Open-Vocabulary Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17905-17915} }
Relational Visual Similarity: Thao Nguyen,

Sicheng Mo,

Krishna Kumar Singh,

Yilin Wang,

Jing Shi,

Nicholas Kolkin,

Eli Shechtman,

Yong Jae Lee,

Yuheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thao and Mo, Sicheng and Singh, Krishna Kumar and Wang, Yilin and Shi, Jing and Kolkin, Nicholas and Shechtman, Eli and Lee, Yong Jae and Li, Yuheng}, title = {Relational Visual Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24141-24150} }
EReCu: Pseudo-label Evolution Fusion and Refinement with Multi-Cue Learning for Unsupervised Camouflage Detection: Shuo Jiang,

Gaojia Zhang,

Min Tan,

Yufei Yin,

Gang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Shuo and Zhang, Gaojia and Tan, Min and Yin, Yufei and Pan, Gang}, title = {EReCu: Pseudo-label Evolution Fusion and Refinement with Multi-Cue Learning for Unsupervised Camouflage Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25547-25556} }
Learning and Aligning Click-Aware Shape Prior for Interactive Amodal Instance Segmentation: Junjie Chen,

Junwei Lin,

Ren Hong,

Shengjie Liu,

Yuming Fang,

Feng Qian,

Yifan Zuo; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Lin, Junwei and Hong, Ren and Liu, Shengjie and Fang, Yuming and Qian, Feng and Zuo, Yifan}, title = {Learning and Aligning Click-Aware Shape Prior for Interactive Amodal Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20478-20487} }
A Multi-Agent Perception-Action Alliance for Efficient Long Video Reasoning: Yichang Xu,

Gaowen Liu,

Ramana Rao Kompella,

Tiansheng Huang,

Sihao Hu,

Fatih Ilhan,

Selim Furkan Tekin,

Zachary Yahn,

Ling Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yichang and Liu, Gaowen and Kompella, Ramana Rao and Huang, Tiansheng and Hu, Sihao and Ilhan, Fatih and Tekin, Selim Furkan and Yahn, Zachary and Liu, Ling}, title = {A Multi-Agent Perception-Action Alliance for Efficient Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19497-19507} }
OntoAug: Rethinking Generative Data Augmentation via Ontology Guidance: Shuo Wang,

Zhichuan Wang,

Jun Luo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shuo and Wang, Zhichuan and Luo, Jun}, title = {OntoAug: Rethinking Generative Data Augmentation via Ontology Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22519-22528} }
EnergyAction: Unimanual to Bimanual Composition with Energy-Based Models: Mingchen Song,

Xiang Deng,

Jie Wei,

Dongmei Jiang,

Liqiang Nie,

Weili Guan; [pdf] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Mingchen and Deng, Xiang and Wei, Jie and Jiang, Dongmei and Nie, Liqiang and Guan, Weili}, title = {EnergyAction: Unimanual to Bimanual Composition with Energy-Based Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20845-20855} }
MDS-VQA: Model-Informed Data Selection for Video Quality Assessment: Jian Zou,

Xiaoyu Xu,

Zhihua Wang,

Yilin Wang,

Balu Adsumilli,

Kede Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Jian and Xu, Xiaoyu and Wang, Zhihua and Wang, Yilin and Adsumilli, Balu and Ma, Kede}, title = {MDS-VQA: Model-Informed Data Selection for Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22713-22722} }
SEBA: Sample-Efficient Black-Box Attacks on Visual Reinforcement Learning: Tairan Huang,

Yulin Jin,

Junxu Liu,

Qingqing Ye,

Haibo Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Tairan and Jin, Yulin and Liu, Junxu and Ye, Qingqing and Hu, Haibo}, title = {SEBA: Sample-Efficient Black-Box Attacks on Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27861-27871} }
Multimodal Semantic Bias Mitigation for Diverse Text-To-3D Generation: Yukuan Min,

Muli Yang,

Jinhao Zhang,

Yuxuan Wang,

Yihang Zhu,

Jiexi Yan,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Min_2026_CVPR, author = {Min, Yukuan and Yang, Muli and Zhang, Jinhao and Wang, Yuxuan and Zhu, Yihang and Yan, Jiexi and Deng, Cheng}, title = {Multimodal Semantic Bias Mitigation for Diverse Text-To-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14779-14788} }
Iris: Bringing Real-World Priors into Diffusion Model for Monocular Depth Estimation: Xinhao Cai,

Gensheng Pei,

Zeren Sun,

Yazhou Yao,

Fumin Shen,

Wenguan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Xinhao and Pei, Gensheng and Sun, Zeren and Yao, Yazhou and Shen, Fumin and Wang, Wenguan}, title = {Iris: Bringing Real-World Priors into Diffusion Model for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26909-26919} }
RewardFlow: Generate Images by Optimizing What You Reward: Onkar Susladkar,

Dong-Hwan Jang,

Tushar Prakash,

Adheesh Juvekar,

Vedant Shah,

Ayush Barik,

Nabeel Bashir,

Muntasir Wahed,

Ritish Shrirao,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Susladkar_2026_CVPR, author = {Susladkar, Onkar and Jang, Dong-Hwan and Prakash, Tushar and Juvekar, Adheesh and Shah, Vedant and Barik, Ayush and Bashir, Nabeel and Wahed, Muntasir and Shrirao, Ritish and Lourentzou, Ismini}, title = {RewardFlow: Generate Images by Optimizing What You Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20066-20076} }
PAS: Prelim Attention Score for Detecting Object Hallucinations in Large Vision-Language Models: Nhat Hoang,

Minh Vu,

My T. Thai,

Manish Bhattarai; [pdf] [supp]
[bibtex]
@InProceedings{Hoang_2026_CVPR, author = {Hoang, Nhat and Vu, Minh and Thai, My T. and Bhattarai, Manish}, title = {PAS: Prelim Attention Score for Detecting Object Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18273-18283} }
HOPS: Hierarchical Open-vocabulary Part Segmentation with Attention-Aware Filtering and Affinity-Guided Enhancement: Xinlong Li,

Di Lin,

Shaoyiyi Gao,

Yaxuan Liu,

Jixian He,

Jiaxin Li,

Ruonan Liu,

Qing Guo,

Kairui Yang,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinlong and Lin, Di and Gao, Shaoyiyi and Liu, Yaxuan and He, Jixian and Li, Jiaxin and Liu, Ruonan and Guo, Qing and Yang, Kairui and Feng, Wei}, title = {HOPS: Hierarchical Open-vocabulary Part Segmentation with Attention-Aware Filtering and Affinity-Guided Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27719-27729} }
Captain Safari: A World Engine with Pose-Aligned 3D Memory: Yu-Cheng Chou,

Xingrui Wang,

Yitong Li,

Jiahao Wang,

Hanting Liu,

Cihang Xie,

Alan Yuille,

Junfei Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chou_2026_CVPR, author = {Chou, Yu-Cheng and Wang, Xingrui and Li, Yitong and Wang, Jiahao and Liu, Hanting and Xie, Cihang and Yuille, Alan and Xiao, Junfei}, title = {Captain Safari: A World Engine with Pose-Aligned 3D Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25347-25357} }
Streaming Video Crime Anticipation with Spatio-Temporal Causal Reasoning: Yusong Wang,

Zheyuan Gu,

Keyu Mao,

Minghao Shao,

Mingkun Xu,

Prayag Tiwari,

Jiawei Shao,

Qingsong Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yusong and Gu, Zheyuan and Mao, Keyu and Shao, Minghao and Xu, Mingkun and Tiwari, Prayag and Shao, Jiawei and Zhao, Qingsong}, title = {Streaming Video Crime Anticipation with Spatio-Temporal Causal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16933-16943} }
TopoMA: Topology-Guided Multi-Agent Dense RGB 3D Reconstruction via Distributed Inference: Xuanxuan Zhang,

ShuHui Shi,

Tianxiang Zhang,

Zhetao Guo,

Huang Zixuan,

You Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuanxuan and Shi, ShuHui and Zhang, Tianxiang and Guo, Zhetao and Zixuan, Huang and Li, You}, title = {TopoMA: Topology-Guided Multi-Agent Dense RGB 3D Reconstruction via Distributed Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21784-21793} }
Rejection Mixing: Fast Semantic Propagation of Mask Tokens for Efficient DLLM Inference: Yushi Ye,

Feng Hong,

Huangjie Zheng,

Xu Chen,

Zhiyong Chen,

Yanfeng Wang,

Jiangchao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yushi and Hong, Feng and Zheng, Huangjie and Chen, Xu and Chen, Zhiyong and Wang, Yanfeng and Yao, Jiangchao}, title = {Rejection Mixing: Fast Semantic Propagation of Mask Tokens for Efficient DLLM Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17619-17629} }
CCF: Complementary Collaborative Fusion for Domain Generalized Multi-Modal 3D Object Detection: Yuchen Wu,

Kun Wang,

Yining Pan,

Na Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yuchen and Wang, Kun and Pan, Yining and Zhao, Na}, title = {CCF: Complementary Collaborative Fusion for Domain Generalized Multi-Modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18745-18754} }
PGR-Net: Prior-Guided ROI Reasoning Network for Brain Tumor MRI Segmentation: Jiacheng Lu,

Hui Ding,

Shiyu Zhang,

Guoping Huo; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jiacheng and Ding, Hui and Zhang, Shiyu and Huo, Guoping}, title = {PGR-Net: Prior-Guided ROI Reasoning Network for Brain Tumor MRI Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22816-22825} }
GenMask: Adapting DiT for Segmentation via Direct Mask Generation: Yuhuan Yang,

Xianwei Zhuang,

Yuxuan Cai,

Chaofan Ma,

Shuai Bai,

Jiangchao Yao,

Ya Zhang,

Junyang Lin,

Yanfeng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuhuan and Zhuang, Xianwei and Cai, Yuxuan and Ma, Chaofan and Bai, Shuai and Yao, Jiangchao and Zhang, Ya and Lin, Junyang and Wang, Yanfeng}, title = {GenMask: Adapting DiT for Segmentation via Direct Mask Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20455-20467} }
FedCART: Tackling Long-Tailed Distributions in Federated Adversarial Training via Classifier Refinement: Yuchen Qin,

Yizhi Zhou,

Junxiao Wang,

Xin Xie,

Heng Qi; [pdf]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yuchen and Zhou, Yizhi and Wang, Junxiao and Xie, Xin and Qi, Heng}, title = {FedCART: Tackling Long-Tailed Distributions in Federated Adversarial Training via Classifier Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24557-24566} }
Reading Your Actions: Learning Generalizable Action Representations via Pre-training AEMG: Zhenghao Huang,

Huilin Yao,

Kaikai Wang,

Lin Shu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhenghao and Yao, Huilin and Wang, Kaikai and Shu, Lin}, title = {Reading Your Actions: Learning Generalizable Action Representations via Pre-training AEMG}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20108-20117} }
Frequency-Aware Affinity for Weakly Supervised Semantic Segmentation: Ziqian Yang,

Xianglin Qiu,

Xinqiao Zhao,

Xiaolei Wang,

Quan Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ziqian and Qiu, Xianglin and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {Frequency-Aware Affinity for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20468-20477} }
OmniVTG: A Large-Scale Dataset and Training Paradigm for Open-World Video Temporal Grounding: Minghang Zheng,

Zihao Yin,

Yi Yang,

Yuxin Peng,

Yang Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Minghang and Yin, Zihao and Yang, Yi and Peng, Yuxin and Liu, Yang}, title = {OmniVTG: A Large-Scale Dataset and Training Paradigm for Open-World Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24620-24629} }
Cut to the Chase: Training-free Multimodal Summarization via Chain-of-Events: Xiaoxing You,

Qiang Huang,

Lingyu Li,

Xiaojun Chang,

Jun Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Xiaoxing and Huang, Qiang and Li, Lingyu and Chang, Xiaojun and Yu, Jun}, title = {Cut to the Chase: Training-free Multimodal Summarization via Chain-of-Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26219-26229} }
Saliency-Guided Representation with Consistency Policy Learning for Visual Unsupervised Reinforcement Learning: Jingbo Sun,

Qichao Zhang,

Songjun Tu,

Xing Fang,

Yupeng Zheng,

Haoran Li,

Ke Chen,

Dongbin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jingbo and Zhang, Qichao and Tu, Songjun and Fang, Xing and Zheng, Yupeng and Li, Haoran and Chen, Ke and Zhao, Dongbin}, title = {Saliency-Guided Representation with Consistency Policy Learning for Visual Unsupervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19508-19517} }
I-Scene: 3D Instance Models are Implicit Generalizable Spatial Learners: Lu Ling,

Yunhao Ge,

Yichen Sheng,

Aniket Bera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2026_CVPR, author = {Ling, Lu and Ge, Yunhao and Sheng, Yichen and Bera, Aniket}, title = {I-Scene: 3D Instance Models are Implicit Generalizable Spatial Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26974-26983} }
Cleaning the Pool: Progressive Filtering of Unlabeled Pools in Deep Active Learning: Denis Huseljic,

Marek Herde,

Lukas Rauch,

Paul Hahn,

Bernhard Sick; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huseljic_2026_CVPR, author = {Huseljic, Denis and Herde, Marek and Rauch, Lukas and Hahn, Paul and Sick, Bernhard}, title = {Cleaning the Pool: Progressive Filtering of Unlabeled Pools in Deep Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22238-22247} }
Seeing Clearly, Reasoning Confidently: Plug-and-Play Remedies for Vision Language Model Blindness: Xin Hu,

Haomiao Ni,

Yunbei Zhang,

Jihun Hamm,

Zechen Li,

Zhengming Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Xin and Ni, Haomiao and Zhang, Yunbei and Hamm, Jihun and Li, Zechen and Ding, Zhengming}, title = {Seeing Clearly, Reasoning Confidently: Plug-and-Play Remedies for Vision Language Model Blindness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18806-18815} }
Dataset Distillation by Influence Matching: Haoru Tan,

Wang Wang,

Sitong Wu,

Xiuzhe Wu,

Yang-Tian Sun,

Chirui Chang,

Shaofeng Zhang,

Xiaojuan Qi; [pdf]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Haoru and Wang, Wang and Wu, Sitong and Wu, Xiuzhe and Sun, Yang-Tian and Chang, Chirui and Zhang, Shaofeng and Qi, Xiaojuan}, title = {Dataset Distillation by Influence Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19654-19664} }
Efficient Video Object Segmentation and Tracking with Recurrent Dynamic Submodel: Weidong Tang,

Zhiyuan Liang,

Xinyan Wan,

Chen Zhu,

Zhaopan Xu,

Pengfei Zhou,

Yan Song,

Yang You,

Wangbo Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Weidong and Liang, Zhiyuan and Wan, Xinyan and Zhu, Chen and Xu, Zhaopan and Zhou, Pengfei and Song, Yan and You, Yang and Zhao, Wangbo}, title = {Efficient Video Object Segmentation and Tracking with Recurrent Dynamic Submodel}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20912-20921} }
Zero-Shot Depth Completion with Vision-Language Model: Zhiqiang Yan,

Yuan Wu,

Gim Hee Lee; [pdf]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Zhiqiang and Wu, Yuan and Lee, Gim Hee}, title = {Zero-Shot Depth Completion with Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19833-19843} }
SAVA-X: Ego-to-Exo Imitation Error Detection via Scene-Adaptive View Alignment and Bidirectional Cross View Fusion: Xiang Li,

Heqian Qiu,

Lanxiao Wang,

Benliu Qiu,

Fanman Meng,

Linfeng Xu,

Hongliang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Qiu, Heqian and Wang, Lanxiao and Qiu, Benliu and Meng, Fanman and Xu, Linfeng and Li, Hongliang}, title = {SAVA-X: Ego-to-Exo Imitation Error Detection via Scene-Adaptive View Alignment and Bidirectional Cross View Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28062-28073} }
Rosetta Stone For Unified MLLMs: A Unified Tokenizer to Decipher Understanding and Generation: Wenyu Sun,

Hufei Li,

Ruijin Jin,

Xiangheng Kong,

Yuning Jiang; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Wenyu and Li, Hufei and Jin, Ruijin and Kong, Xiangheng and Jiang, Yuning}, title = {Rosetta Stone For Unified MLLMs: A Unified Tokenizer to Decipher Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22964-22974} }
Anomaly as Non-Conformity via Training-Free Graph Laplacian Energy Minimization: Jungwook Seo,

Minjeong Kim,

Younkwan Lee,

Seungho Shin,

Sungyong Baik; [pdf] [supp]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Jungwook and Kim, Minjeong and Lee, Younkwan and Shin, Seungho and Baik, Sungyong}, title = {Anomaly as Non-Conformity via Training-Free Graph Laplacian Energy Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21336-21345} }
TTL: Test-time Textual Learning for OOD Detection with Pretrained Vision-Language Models: Jinlun Ye,

Jiang Liao,

Runhe Lai,

Xinhua Lu,

Jiaxin Zhuang,

Zhiyong Gan,

Ruixuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Jinlun and Liao, Jiang and Lai, Runhe and Lu, Xinhua and Zhuang, Jiaxin and Gan, Zhiyong and Wang, Ruixuan}, title = {TTL: Test-time Textual Learning for OOD Detection with Pretrained Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27590-27599} }
FreqSIC: Frequency-aware Stereo Image Compression with Bi-directional Checkerboard Context Model: Shiyu Qin,

Yongkang Lu,

Yimin Zhou,

Jiawei Li,

Yifan Ren,

Yuerong Xue,

Shu-Tao Xia,

Bin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Shiyu and Lu, Yongkang and Zhou, Yimin and Li, Jiawei and Ren, Yifan and Xue, Yuerong and Xia, Shu-Tao and Chen, Bin}, title = {FreqSIC: Frequency-aware Stereo Image Compression with Bi-directional Checkerboard Context Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19393-19402} }
Less is More: Data-Efficient Adaptation for Controllable Text-to-Video Generation: Shihan Cheng,

Nilesh Kulkarni,

David Hyde,

Dmitriy Smirnov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Shihan and Kulkarni, Nilesh and Hyde, David and Smirnov, Dmitriy}, title = {Less is More: Data-Efficient Adaptation for Controllable Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14811-14821} }
Socratic-Geo: Synthetic Data Generation and Cross-Modal Geometric Reasoning via Multi-Agent Interaction: Zhengbo Jiao,

Zifan Zhang,

Shaobo Wang,

Wei Wang,

Bing Zhao,

Hu Wei,

Linfeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Zhengbo and Zhang, Zifan and Wang, Shaobo and Wang, Wei and Zhao, Bing and Wei, Hu and Zhang, Linfeng}, title = {Socratic-Geo: Synthetic Data Generation and Cross-Modal Geometric Reasoning via Multi-Agent Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23795-23804} }
SATTC: Structure-Aware Label-Free Test-Time Calibration for Cross-Subject EEG-to-Image Retrieval: Qunjie Huang,

Weina Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qunjie and Zhu, Weina}, title = {SATTC: Structure-Aware Label-Free Test-Time Calibration for Cross-Subject EEG-to-Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16887-16896} }
Fine-Grained Multi Image Object Hallucination Benchmark: Joonki Min,

Chaeyun Kim,

Hyungwook Choi,

Yejin Kim,

Kihyun Kim,

Yohan Jo,

Joonseok Lee; [pdf] [supp]
[bibtex]
@InProceedings{Min_2026_CVPR, author = {Min, Joonki and Kim, Chaeyun and Choi, Hyungwook and Kim, Yejin and Kim, Kihyun and Jo, Yohan and Lee, Joonseok}, title = {Fine-Grained Multi Image Object Hallucination Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18295-18305} }
MixerCSeg: An Efficient Mixer Architecture for Crack Segmentation via Decoupled Mamba Attention: Zilong Zhao,

Zhengming Ding,

Pei Niu,

Wenhao Sun,

Feng Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zilong and Ding, Zhengming and Niu, Pei and Sun, Wenhao and Guo, Feng}, title = {MixerCSeg: An Efficient Mixer Architecture for Crack Segmentation via Decoupled Mamba Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17969-17978} }
Co-Me: Confidence Guided Token Merging for Visual Geometric Transformers: Yutian Chen,

Yuheng Qiu,

Ruogu Li,

Jay Patrikar,

Sebastian Scherer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yutian and Qiu, Yuheng and Li, Ruogu and Patrikar, Jay and Scherer, Sebastian}, title = {Co-Me: Confidence Guided Token Merging for Visual Geometric Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14590-14599} }
EagleNet: Energy-Aware Fine-Grained Relationship Learning Network for Text-Video Retrieval: Yuhan Chen,

Pengwen Dai,

Chuan Wang,

Dayan Wu,

Xiaochun Cao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuhan and Dai, Pengwen and Wang, Chuan and Wu, Dayan and Cao, Xiaochun}, title = {EagleNet: Energy-Aware Fine-Grained Relationship Learning Network for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23901-23911} }
Local Precise Refinement: A Dual-Gated Mixture-of-Experts for Enhancing Foundation Model Generalization against Spectral Shifts: Xi Chen,

Maojun Zhang,

Yu Liu,

Shen Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xi and Zhang, Maojun and Liu, Yu and Yan, Shen}, title = {Local Precise Refinement: A Dual-Gated Mixture-of-Experts for Enhancing Foundation Model Generalization against Spectral Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20521-20531} }
Stereo World Model: Camera-Guided Stereo Video Generation: Yang-Tian Sun,

Zehuan Huang,

Yifan Niu,

Lin Ma,

Yan-Pei Cao,

Yuewen Ma,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yang-Tian and Huang, Zehuan and Niu, Yifan and Ma, Lin and Cao, Yan-Pei and Ma, Yuewen and Qi, Xiaojuan}, title = {Stereo World Model: Camera-Guided Stereo Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18342-18353} }
Accelerating Streaming Video Large Language Models via Hierarchical Token Compression: Yiyu Wang,

Xuyang Liu,

Xiyan Gui,

Xinying Lin,

Boxue Yang,

Chenfei Liao,

Tailai Chen,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyu and Liu, Xuyang and Gui, Xiyan and Lin, Xinying and Yang, Boxue and Liao, Chenfei and Chen, Tailai and Zhang, Linfeng}, title = {Accelerating Streaming Video Large Language Models via Hierarchical Token Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18523-18533} }
MS^2Gait: A Multi-Scale Spatio-Temporal Fusion Network for LiDAR-based Gait Recognition: Shenyin Xu,

Yishan Wang,

Xinyu Li,

Rui Liu,

Zhongyuan Wang,

Xin Tian; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Shenyin and Wang, Yishan and Li, Xinyu and Liu, Rui and Wang, Zhongyuan and Tian, Xin}, title = {MS{\textasciicircum}2Gait: A Multi-Scale Spatio-Temporal Fusion Network for LiDAR-based Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17184-17193} }
Gen3R: 3D Scene Generation Meets Feed-Forward Reconstruction: Jiaxin Huang,

Yuanbo Yang,

Bangbang Yang,

Lin Ma,

Yuewen Ma,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiaxin and Yang, Yuanbo and Yang, Bangbang and Ma, Lin and Ma, Yuewen and Liao, Yiyi}, title = {Gen3R: 3D Scene Generation Meets Feed-Forward Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25358-25369} }
REALM: An MLLM-Agent Framework for Open World 3D Reasoning Segmentation and Editing on Gaussian Splatting: Changyue Shi,

Minghao Chen,

Yiping Mao,

Chuxiao Yang,

Xinyuan Hu,

Jiajun Ding,

Zhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Changyue and Chen, Minghao and Mao, Yiping and Yang, Chuxiao and Hu, Xinyuan and Ding, Jiajun and Yu, Zhou}, title = {REALM: An MLLM-Agent Framework for Open World 3D Reasoning Segmentation and Editing on Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16779-16788} }
Unlocking Positive Transfer in Incrementally Learning Surgical Instruments: A Self-reflection Hierarchical Prompt Framework: Yu Zhu,

Kang Li,

Zheng Li,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yu and Li, Kang and Li, Zheng and Heng, Pheng-Ann}, title = {Unlocking Positive Transfer in Incrementally Learning Surgical Instruments: A Self-reflection Hierarchical Prompt Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21006-21015} }
Restore, Assess, Repeat: A Unified Framework for Iterative Image Restoration: I-Hsiang Chen,

Isma Hadji,

Enrique Sanchez,

Adrian Bulat,

Sy-Yen Kuo,

Radu Timofte,

Georgios Tzimiropoulos,

Brais Martinez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, I-Hsiang and Hadji, Isma and Sanchez, Enrique and Bulat, Adrian and Kuo, Sy-Yen and Timofte, Radu and Tzimiropoulos, Georgios and Martinez, Brais}, title = {Restore, Assess, Repeat: A Unified Framework for Iterative Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15506-15515} }
Learning to Identify Out-of-Distribution Objects for 3D LiDAR Anomaly Segmentation: Simone Mosco,

Daniel Fusaro,

Alberto Pretto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mosco_2026_CVPR, author = {Mosco, Simone and Fusaro, Daniel and Pretto, Alberto}, title = {Learning to Identify Out-of-Distribution Objects for 3D LiDAR Anomaly Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17204-17214} }
Object-Generalized Re-Identification: A Step Towards Universal Instance Perception: Shuoyi Chen,

Yurui Wu,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shuoyi and Wu, Yurui and Ye, Mang}, title = {Object-Generalized Re-Identification: A Step Towards Universal Instance Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18481-18491} }
Towards Real-World Document Parsing via Realistic Scene Synthesis and Document-Aware Training: Gengluo Li,

Pengyuan Lyu,

Chengquan Zhang,

Huawen Shen,

Liang Wu,

Xingyu Wan,

Gangyan Zeng,

Han Hu,

Can Ma,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Gengluo and Lyu, Pengyuan and Zhang, Chengquan and Shen, Huawen and Wu, Liang and Wan, Xingyu and Zeng, Gangyan and Hu, Han and Ma, Can and Zhou, Yu}, title = {Towards Real-World Document Parsing via Realistic Scene Synthesis and Document-Aware Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23709-23719} }
Wavelet-based Frame Selection by Detecting Semantic Boundary for Long Video Understanding: Wang Chen,

Yuhui Zeng,

Yongdong Luo,

Tianyu Xie,

Luojun Lin,

Jiayi Ji,

Yan Zhang,

Xiawu Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Wang and Zeng, Yuhui and Luo, Yongdong and Xie, Tianyu and Lin, Luojun and Ji, Jiayi and Zhang, Yan and Zheng, Xiawu}, title = {Wavelet-based Frame Selection by Detecting Semantic Boundary for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24052-24061} }
Bridging Pixels and Words: Mask-Aware Local Semantic Fusion for Multimodal Media Verification: Zizhao Chen,

Ping Wei,

Ziyang Ren,

Huan Li,

Xiangru Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zizhao and Wei, Ping and Ren, Ziyang and Li, Huan and Yin, Xiangru}, title = {Bridging Pixels and Words: Mask-Aware Local Semantic Fusion for Multimodal Media Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26561-26571} }
CodeV: Code with Images for Faithful Visual Reasoning via Tool-Aware Policy Optimization: Xinhai Hou,

Shaoyuan Xu,

Manan Biyani,

Moyan Li,

Jia Liu,

Todd C Hollon,

Bryan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Xinhai and Xu, Shaoyuan and Biyani, Manan and Li, Moyan and Liu, Jia and Hollon, Todd C and Wang, Bryan}, title = {CodeV: Code with Images for Faithful Visual Reasoning via Tool-Aware Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21500-21510} }
Mario: Multimodal Graph Reasoning with Large Language Models: Yuanfu Sun,

Kang Li,

Pengkang Guo,

Jiajin Liu,

Qiaoyu Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yuanfu and Li, Kang and Guo, Pengkang and Liu, Jiajin and Tan, Qiaoyu}, title = {Mario: Multimodal Graph Reasoning with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19219-19228} }
L3DR: 3D-aware LiDAR Diffusion and Rectification: Quan Liu,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Quan and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {L3DR: 3D-aware LiDAR Diffusion and Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17153-17163} }
Semantic Derivative Flow: Graph-Guided Diffusion for Controllable Instance Interactions: Shibin Mei,

Hang Wang,

Bingbing Ni; [pdf] [supp]
[bibtex]
@InProceedings{Mei_2026_CVPR, author = {Mei, Shibin and Wang, Hang and Ni, Bingbing}, title = {Semantic Derivative Flow: Graph-Guided Diffusion for Controllable Instance Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14822-14831} }
MindDriver: Introducing Progressive Multimodal Reasoning for Autonomous Driving: Lingjun Zhang,

Yujian Yuan,

Changjie Wu,

Xinyuan Chang,

Xin Cai,

Shuang Zeng,

Linzhe Shi,

Sijin Wang,

Hang Zhang,

Mu Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lingjun and Yuan, Yujian and Wu, Changjie and Chang, Xinyuan and Cai, Xin and Zeng, Shuang and Shi, Linzhe and Wang, Sijin and Zhang, Hang and Xu, Mu}, title = {MindDriver: Introducing Progressive Multimodal Reasoning for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17831-17841} }
STCast: Adaptive Boundary Alignment for Global and Regional Weather Forecasting: Hao Chen,

Tao Han,

Jie Zhang,

Song Guo,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hao and Han, Tao and Zhang, Jie and Guo, Song and Bai, Lei}, title = {STCast: Adaptive Boundary Alignment for Global and Regional Weather Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20586-20596} }
Resolving Endpoint Underfitting in Diffusion Bridges via Noise Alignment: Yurong Gao,

Zicheng Zhang,

Congying Han,

Tiande Guo,

Xinmin Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yurong and Zhang, Zicheng and Han, Congying and Guo, Tiande and Qiu, Xinmin}, title = {Resolving Endpoint Underfitting in Diffusion Bridges via Noise Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27388-27397} }
Pointer-CAD: Unifying B-Rep and Command Sequences via Pointer-based Edges & Faces Selection: Dacheng Qi,

Chenyu Wang,

Jingwei Xu,

Tianzhe Chu,

Zibo Zhao,

Wen Liu,

Wenrui Ding,

Yi Ma,

Shenghua Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Dacheng and Wang, Chenyu and Xu, Jingwei and Chu, Tianzhe and Zhao, Zibo and Liu, Wen and Ding, Wenrui and Ma, Yi and Gao, Shenghua}, title = {Pointer-CAD: Unifying B-Rep and Command Sequences via Pointer-based Edges \& Faces Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17377-17387} }
VarSplat: Uncertainty-aware 3D Gaussian Splatting for Robust RGB-D SLAM: Anh Thuan Tran,

Jana Kosecka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Anh Thuan and Kosecka, Jana}, title = {VarSplat: Uncertainty-aware 3D Gaussian Splatting for Robust RGB-D SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26072-26082} }
SafeDrive: Fine-Grained Safety Reasoning for End-to-End Driving in a Sparse World: Jungho Kim,

Jiyong Oh,

Seunghoon Yu,

Hongjae Shin,

Donghyuk Kwak,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jungho and Oh, Jiyong and Yu, Seunghoon and Shin, Hongjae and Kwak, Donghyuk and Choi, Jun Won}, title = {SafeDrive: Fine-Grained Safety Reasoning for End-to-End Driving in a Sparse World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24854-24864} }
CryoKRAQEN: Kernel-Regularized Annealing for Quantized Embedding Networks in Cryo-EM Heterogeneous Reconstruction: Wenyuan Gao,

Yutan Wu,

Xuming He; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Wenyuan and Wu, Yutan and He, Xuming}, title = {CryoKRAQEN: Kernel-Regularized Annealing for Quantized Embedding Networks in Cryo-EM Heterogeneous Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28298-28307} }
mVLM: A Vision Language Model for mNPUs: Zijie Chen,

Guiyun Fan,

Zhaoxing Yang,

Rong Ding,

Haiming Jin; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zijie and Fan, Guiyun and Yang, Zhaoxing and Ding, Rong and Jin, Haiming}, title = {mVLM: A Vision Language Model for mNPUs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18892-18902} }
Decompose and Transfer: CoT-Prompting Enhanced Alignment for Open-Vocabulary Temporal Action Detection: Sa Zhu,

Wanqian Zhang,

Lin Wang,

Xiaohua Chen,

Chenxu Cui,

Jinchao Zhang,

Bo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Sa and Zhang, Wanqian and Wang, Lin and Chen, Xiaohua and Cui, Chenxu and Zhang, Jinchao and Li, Bo}, title = {Decompose and Transfer: CoT-Prompting Enhanced Alignment for Open-Vocabulary Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20334-20344} }
SliderEdit: Continuous Image Editing with Fine-Grained Instruction Control: Arman Zarei,

Samyadeep Basu,

Mobina Pournemat,

Sayan Nag,

Ryan A. Rossi,

Soheil Feizi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zarei_2026_CVPR, author = {Zarei, Arman and Basu, Samyadeep and Pournemat, Mobina and Nag, Sayan and Rossi, Ryan A. and Feizi, Soheil}, title = {SliderEdit: Continuous Image Editing with Fine-Grained Instruction Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14430-14439} }
Reinforcement-Guided Synthetic Data Generation for Privacy-Sensitive Identity Recognition: Xuemei Jia,

Jiawei Du,

Hui Wei,

Jun Chen,

Joey Tianyi Zhou,

Zheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Xuemei and Du, Jiawei and Wei, Hui and Chen, Jun and Zhou, Joey Tianyi and Wang, Zheng}, title = {Reinforcement-Guided Synthetic Data Generation for Privacy-Sensitive Identity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20034-20044} }
Faithful Contouring: Near-Lossless 3D Voxel Representation Free from Iso-surface: Yihao Luo,

Xianglong He,

Chuanyu Pan,

Yiwen Chen,

Jiaqi Wu,

Yangguang Li,

Wanli Ouyang,

Yuanming Hu,

Guang Yang,

ChoonHwai Yap; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Yihao and He, Xianglong and Pan, Chuanyu and Chen, Yiwen and Wu, Jiaqi and Li, Yangguang and Ouyang, Wanli and Hu, Yuanming and Yang, Guang and Yap, ChoonHwai}, title = {Faithful Contouring: Near-Lossless 3D Voxel Representation Free from Iso-surface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14408-14418} }
Image Generation from Contextually-Contradictory Prompts: Saar Huberman,

Or Patashnik,

Omer Dahary,

Ron Mokady,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huberman_2026_CVPR, author = {Huberman, Saar and Patashnik, Or and Dahary, Omer and Mokady, Ron and Cohen-Or, Daniel}, title = {Image Generation from Contextually-Contradictory Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14885-14894} }
An Empirical Study on How Video-LLMs Answer Video Questions: Chenhui Gou,

Ziyu Ma,

Zicheng Duan,

Haoyu He,

Feng Chen,

Akide Liu,

Bohan Zhuang,

Jianfei Cai,

Hamid Rezatofighi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gou_2026_CVPR, author = {Gou, Chenhui and Ma, Ziyu and Duan, Zicheng and He, Haoyu and Chen, Feng and Liu, Akide and Zhuang, Bohan and Cai, Jianfei and Rezatofighi, Hamid}, title = {An Empirical Study on How Video-LLMs Answer Video Questions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18587-18597} }
Robust Spiking Neural Networks by Temporal Mutual Information: Mengting Xu,

Shi Gu,

Peng Lin,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Mengting and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {Robust Spiking Neural Networks by Temporal Mutual Information}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20711-20720} }
GaussFusion: Improving 3D Reconstruction in the Wild with A Geometry-Informed Video Generator: Liyuan Zhu,

Manjunath Narayana,

Michal Stary,

Will Hutchcroft,

Gordon Wetzstein,

Iro Armeni; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Liyuan and Narayana, Manjunath and Stary, Michal and Hutchcroft, Will and Wetzstein, Gordon and Armeni, Iro}, title = {GaussFusion: Improving 3D Reconstruction in the Wild with A Geometry-Informed Video Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15432-15442} }
ShelfOcc: Native 3D Supervision beyond LiDAR for Vision-Based Occupancy Estimation: Simon Boeder,

Fabian Gigengack,

Simon Roesler,

Holger Caesar,

Benjamin Risse; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boeder_2026_CVPR, author = {Boeder, Simon and Gigengack, Fabian and Roesler, Simon and Caesar, Holger and Risse, Benjamin}, title = {ShelfOcc: Native 3D Supervision beyond LiDAR for Vision-Based Occupancy Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28620-28631} }
CUBic: Coordinated Unified Bimanual Perception and Control Framework: Xingyu Wang,

Pengxiang Ding,

Jingkai Xu,

Donglin Wang,

Zhaoxin Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xingyu and Ding, Pengxiang and Xu, Jingkai and Wang, Donglin and Fan, Zhaoxin}, title = {CUBic: Coordinated Unified Bimanual Perception and Control Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20856-20866} }
Unposed-to-3D: Learning Simulation-Ready Vehicles from Real-World Images: Hongyuan Liu,

Bochao Zou,

Qiankun Liu,

Haochen Yu,

Qi Mei,

Jianfei Jiang,

Chen Liu,

Cheng Bi,

Zhao Wang,

Xueyang Zhang,

Yifei Zhan,

Jiansheng Chen,

Huimin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hongyuan and Zou, Bochao and Liu, Qiankun and Yu, Haochen and Mei, Qi and Jiang, Jianfei and Liu, Chen and Bi, Cheng and Wang, Zhao and Zhang, Xueyang and Zhan, Yifei and Chen, Jiansheng and Ma, Huimin}, title = {Unposed-to-3D: Learning Simulation-Ready Vehicles from Real-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24843-24853} }
SketchAssist: A Practical Assistant for Semantic Edits and Precise Local Redrawing: Han Zou,

Yan Zhang,

Ruiqi Yu,

Cong Xie,

Jie Huang,

Zhenpeng Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Han and Zhang, Yan and Yu, Ruiqi and Xie, Cong and Huang, Jie and Zhan, Zhenpeng}, title = {SketchAssist: A Practical Assistant for Semantic Edits and Precise Local Redrawing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16258-16267} }
Continuous Exposure-Time Modeling for Realistic Atmospheric Turbulence Synthesis: Junwei Zeng,

Dong Liang,

Sheng-Jun Huang,

Kun Zhan,

Songcan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Junwei and Liang, Dong and Huang, Sheng-Jun and Zhan, Kun and Chen, Songcan}, title = {Continuous Exposure-Time Modeling for Realistic Atmospheric Turbulence Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26678-26687} }
DiffBMP: Differentiable Rendering with Bitmap Primitives: Seongmin Hong,

Junghun James Kim,

Daehyeop Kim,

Insoo Chung,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Seongmin and Kim, Junghun James and Kim, Daehyeop and Chung, Insoo and Chun, Se Young}, title = {DiffBMP: Differentiable Rendering with Bitmap Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26741-26750} }
Distribution-Aligned Multimodal Fusion for Robust Object Detection: Xiaohui Hao,

Yanglin Pu,

Yongjun Wang,

Rui She; [pdf]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Xiaohui and Pu, Yanglin and Wang, Yongjun and She, Rui}, title = {Distribution-Aligned Multimodal Fusion for Robust Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25494-25503} }
DRiffusion: Draft-and-Refine Process Parallelizes Diffusion Models with Ease: Runsheng Bai,

Chengyu Zhang,

Yangdong Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Runsheng and Zhang, Chengyu and Deng, Yangdong}, title = {DRiffusion: Draft-and-Refine Process Parallelizes Diffusion Models with Ease}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16551-16560} }
From Selection to Scheduling: Federated Geometry-Aware Correction Makes Exemplar Replay Work Better under Continual Dynamic Heterogeneity: Zhuang Qi,

Ying-Peng Tang,

Lei Meng,

Guoqing Chao,

Lei Wu,

Han Yu,

Xiangxu Meng; [pdf] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Zhuang and Tang, Ying-Peng and Meng, Lei and Chao, Guoqing and Wu, Lei and Yu, Han and Meng, Xiangxu}, title = {From Selection to Scheduling: Federated Geometry-Aware Correction Makes Exemplar Replay Work Better under Continual Dynamic Heterogeneity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17494-17504} }
SpeeDe3DGS: Speedy Deformable 3D Gaussian Splatting with Temporal Pruning and Motion Grouping: Allen Tu,

Haiyang Ying,

Alex Hanson,

Yonghan Lee,

Tom Goldstein,

Matthias Zwicker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Allen and Ying, Haiyang and Hanson, Alex and Lee, Yonghan and Goldstein, Tom and Zwicker, Matthias}, title = {SpeeDe3DGS: Speedy Deformable 3D Gaussian Splatting with Temporal Pruning and Motion Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26083-26093} }
SRA-Det: Learning Omni-Grained Open-Vocabulary Detection Beyond Category Names: Li Yang,

Boyu Cai,

Wei Liu,

Yan Wang,

Chunfeng Yuan,

Bing Li,

Weiming Hu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Li and Cai, Boyu and Liu, Wei and Wang, Yan and Yuan, Chunfeng and Li, Bing and Hu, Weiming}, title = {SRA-Det: Learning Omni-Grained Open-Vocabulary Detection Beyond Category Names}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27611-27620} }
AsymLoc: Towards Asymmetric Feature Matching for Efficient Visual Localization: Mohammad Omama,

Gabriele Berton,

Eric Foxlin,

Yelin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Omama_2026_CVPR, author = {Omama, Mohammad and Berton, Gabriele and Foxlin, Eric and Kim, Yelin}, title = {AsymLoc: Towards Asymmetric Feature Matching for Efficient Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26441-26451} }
PlannerRFT: Reinforcing Diffusion Planners through Closed-Loop and Sample-Efficient Fine-Tuning: Hongchen Li,

Tianyu Li,

Jiazhi Yang,

Mingyang Shang,

Gaoqiang Wu,

Caojun Wang,

Haochen Tian,

Zengrong Lin,

Zhihui Hao,

XianPeng Lang,

Jia Hu,

Hongyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hongchen and Li, Tianyu and Yang, Jiazhi and Shang, Mingyang and Wu, Gaoqiang and Wang, Caojun and Tian, Haochen and Lin, Zengrong and Hao, Zhihui and Lang, XianPeng and Hu, Jia and Li, Hongyang}, title = {PlannerRFT: Reinforcing Diffusion Planners through Closed-Loop and Sample-Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24929-24938} }
ArtiMuse: Fine-Grained Image Aesthetics Assessment with Joint Scoring and Expert-Level Understanding: Shuo Cao,

Nan Ma,

Jiayang Li,

Xiaohui Li,

Lihao Shao,

Kaiwen Zhu,

Yu Zhou,

Yuandong Pu,

Jiarui Wu,

Jiaquan Wang,

Bo Qu,

Wenhai Wang,

Yu Qiao,

Dajuin Yao,

Yihao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Shuo and Ma, Nan and Li, Jiayang and Li, Xiaohui and Shao, Lihao and Zhu, Kaiwen and Zhou, Yu and Pu, Yuandong and Wu, Jiarui and Wang, Jiaquan and Qu, Bo and Wang, Wenhai and Qiao, Yu and Yao, Dajuin and Liu, Yihao}, title = {ArtiMuse: Fine-Grained Image Aesthetics Assessment with Joint Scoring and Expert-Level Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15313-15322} }
Unifying Precise Keyframes and Semantic Control via Multi-level Diffusion: Linjun Wu,

Jiejia Yu,

Leyang Jin,

He Wang,

Bowen Zheng,

Xu Yang,

Hao Jiang,

Fei Xia,

Fei Ling,

Jun Deng,

Xiaogang Jin; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Linjun and Yu, Jiejia and Jin, Leyang and Wang, He and Zheng, Bowen and Yang, Xu and Jiang, Hao and Xia, Fei and Ling, Fei and Deng, Jun and Jin, Xiaogang}, title = {Unifying Precise Keyframes and Semantic Control via Multi-level Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23473-23483} }
DUO-VSR: Dual-Stream Distillation for One-Step Video Super-Resolution: Zhengyao Lv,

Menghan Xia,

Xintao Wang,

Kwan-Yee K. Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Zhengyao and Xia, Menghan and Wang, Xintao and Wong, Kwan-Yee K.}, title = {DUO-VSR: Dual-Stream Distillation for One-Step Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16333-16344} }
BUSSARD: Normalizing Flows for Bijective Universal Scene-Specific Anomalous Relationship Detection: Melissa Schween,

Mathis Kruse,

Bodo Rosenhahn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schween_2026_CVPR, author = {Schween, Melissa and Kruse, Mathis and Rosenhahn, Bodo}, title = {BUSSARD: Normalizing Flows for Bijective Universal Scene-Specific Anomalous Relationship Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28512-28523} }
Energy Waveify and Redistribution for Test-Time Adaptation: A Control System Perspective: Zhenbin Wang,

Lei Zhang,

Lituan Wang,

Zhenwei Zhang,

Guangwu Qian,

Yan Wang,

Wei Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhenbin and Zhang, Lei and Wang, Lituan and Zhang, Zhenwei and Qian, Guangwu and Wang, Yan and Huang, Wei}, title = {Energy Waveify and Redistribution for Test-Time Adaptation: A Control System Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15040-15049} }
InfiniBench: Infinite Benchmarking for Visual Spatial Reasoning with Customizable Scene Complexity: Haoming Wang,

Qiyao Xue,

Wei Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Haoming and Xue, Qiyao and Gao, Wei}, title = {InfiniBench: Infinite Benchmarking for Visual Spatial Reasoning with Customizable Scene Complexity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21594-21605} }
Probabilistic Concept Graph Reasoning for Multimodal Misinformation Detection: Ruichao Yang,

Wei Gao,

Xiaobin Zhu,

Jing Ma,

Hongzhan Lin,

Ziyang Luo,

Bo-Wen Zhang,

Xu-Cheng Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ruichao and Gao, Wei and Zhu, Xiaobin and Ma, Jing and Lin, Hongzhan and Luo, Ziyang and Zhang, Bo-Wen and Yin, Xu-Cheng}, title = {Probabilistic Concept Graph Reasoning for Multimodal Misinformation Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19107-19118} }
AffordGen: Generating Diverse Demonstrations for Generalizable Object Manipulation with Affordance Correspondence: Jiawei Zhang,

Kaizhe Hu,

Yingqian Huang,

Yuanchen Ju,

Zhengrong Xue,

Huazhe Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiawei and Hu, Kaizhe and Huang, Yingqian and Ju, Yuanchen and Xue, Zhengrong and Xu, Huazhe}, title = {AffordGen: Generating Diverse Demonstrations for Generalizable Object Manipulation with Affordance Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15966-15975} }
QuCNet: Quantum Deep Learning Driven Multi-Circuit Network for Remote Sensing Image Classification: Komal Komal,

Mukul Gupta,

Saumya Singh,

Santosh Kumar Vipparthi,

C.C. Reddy,

Subrahmanyam Murala; [pdf] [supp]
[bibtex]
@InProceedings{Komal_2026_CVPR, author = {Komal, Komal and Gupta, Mukul and Singh, Saumya and Vipparthi, Santosh Kumar and Reddy, C.C. and Murala, Subrahmanyam}, title = {QuCNet: Quantum Deep Learning Driven Multi-Circuit Network for Remote Sensing Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20253-20262} }
Rethinking MLLM Itself as a Segmenter with a Single Segmentation Token: Anqi Zhang,

Xiaokang Ji,

Guangyu Gao,

Jianbo Jiao,

Chi Harold Liu,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Anqi and Ji, Xiaokang and Gao, Guangyu and Jiao, Jianbo and Liu, Chi Harold and Wei, Yunchao}, title = {Rethinking MLLM Itself as a Segmenter with a Single Segmentation Token}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19196-19207} }
Nano-EmoX: Unifying Multimodal Emotional Intelligence from Perception to Empathy: Jiahao Huang,

Fengyan Lin,

Xuechao Yang,

Chen Feng,

Kexin Zhu,

Xu Yang,

Zhide Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiahao and Lin, Fengyan and Yang, Xuechao and Feng, Chen and Zhu, Kexin and Yang, Xu and Chen, Zhide}, title = {Nano-EmoX: Unifying Multimodal Emotional Intelligence from Perception to Empathy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22986-22997} }
Beyond Geometry: Artistic Disparity Synthesis for Immersive 2D-to-3D: Ping Chen,

Zezhou Chen,

Xingpeng Zhang,

Yanlin Qian,

Huan Hu,

Xiang Liu,

Zipeng Wang,

Xin Wang,

Zhaoxiang Liu,

Kai Wang,

Shiguo Lian; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ping and Chen, Zezhou and Zhang, Xingpeng and Qian, Yanlin and Hu, Huan and Liu, Xiang and Wang, Zipeng and Wang, Xin and Liu, Zhaoxiang and Wang, Kai and Lian, Shiguo}, title = {Beyond Geometry: Artistic Disparity Synthesis for Immersive 2D-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27114-27123} }
ReAlign: Generalizable Image Forgery Detection via Reasoning-Aligned Representation: Qing Huang,

Zhipei Xu,

Xuanyu Zhang,

Xiangyu Yu,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qing and Xu, Zhipei and Zhang, Xuanyu and Yu, Xiangyu and Zhang, Jian}, title = {ReAlign: Generalizable Image Forgery Detection via Reasoning-Aligned Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21295-21305} }
Video-Only ToM: Enhancing Theory of Mind in Multimodal Large Language Models: Siqi Liu,

Xinyang Li,

Bochao Zou,

Junbao Zhuo,

Huimin Ma,

Jiansheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Siqi and Li, Xinyang and Zou, Bochao and Zhuo, Junbao and Ma, Huimin and Chen, Jiansheng}, title = {Video-Only ToM: Enhancing Theory of Mind in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19208-19218} }
FastGS: Training 3D Gaussian Splatting in 100 Seconds: Shiwei Ren,

Tianci Wen,

Yongchun Fang,

Biao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Shiwei and Wen, Tianci and Fang, Yongchun and Lu, Biao}, title = {FastGS: Training 3D Gaussian Splatting in 100 Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26094-26103} }
Edit-As-Act: Goal-Regressive Planning for Open-Vocabulary 3D Indoor Scene Editing: Seongrae Noh,

SeungWon Seo,

Gyeong-Moon Park,

HyeongYeop Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noh_2026_CVPR, author = {Noh, Seongrae and Seo, SeungWon and Park, Gyeong-Moon and Kang, HyeongYeop}, title = {Edit-As-Act: Goal-Regressive Planning for Open-Vocabulary 3D Indoor Scene Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19864-19873} }
Continual Learning for fMRI-Based Brain Disorder Diagnosis via Functional Connectivity Matrices Generative Replay: Qianyu Chen,

Shujian Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qianyu and Yu, Shujian}, title = {Continual Learning for fMRI-Based Brain Disorder Diagnosis via Functional Connectivity Matrices Generative Replay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25099-25109} }
B$^3$-Seg: Camera-Free, Training-Free 3DGS Segmentation via Analytic EIG and Beta-Bernoulli Bayesian Updates: Hiromichi Kamata,

Samuel Arthur Munro,

Fuminori Homma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kamata_2026_CVPR, author = {Kamata, Hiromichi and Munro, Samuel Arthur and Homma, Fuminori}, title = {B\${\textasciicircum}3\$-Seg: Camera-Free, Training-Free 3DGS Segmentation via Analytic EIG and Beta-Bernoulli Bayesian Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26867-26876} }
Improving Text-to-Image Generation with Intrinsic Self-Confidence Rewards: Seungwook Kim,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seungwook and Cho, Minsu}, title = {Improving Text-to-Image Generation with Intrinsic Self-Confidence Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14832-14843} }
GeoViS: Geospatially Rewarded Visual Search for Remote Sensing Visual Grounding: Peirong Zhang,

Yidan Zhang,

Luxiao Xu,

Jinliang Lin,

Zonghao Guo,

Fengxiang Wang,

Xue Yang,

Kaiwen Wei,

Lei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peirong and Zhang, Yidan and Xu, Luxiao and Lin, Jinliang and Guo, Zonghao and Wang, Fengxiang and Yang, Xue and Wei, Kaiwen and Wang, Lei}, title = {GeoViS: Geospatially Rewarded Visual Search for Remote Sensing Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14335-14345} }
OccuFly: A 3D Vision Benchmark for Semantic Scene Completion from the Aerial Perspective: Markus Gross,

Sai B. Matha,

Aya Fahmy,

Rui Song,

Daniel Cremers,

Henri Meeß; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gross_2026_CVPR, author = {Gross, Markus and Matha, Sai B. and Fahmy, Aya and Song, Rui and Cremers, Daniel and Mee{\ss}, Henri}, title = {OccuFly: A 3D Vision Benchmark for Semantic Scene Completion from the Aerial Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21474-21485} }
Dynamic Stream Network for Combinatorial Explosion Problem in Deformable Medical Image Registration: Shaochen Bi,

Yuting He,

Weiming Wang,

Hao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bi_2026_CVPR, author = {Bi, Shaochen and He, Yuting and Wang, Weiming and Chen, Hao}, title = {Dynamic Stream Network for Combinatorial Explosion Problem in Deformable Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15649-15658} }
MR-RAG: Multimodal Relevance-Aware Retrieval-Augmented Generation for Medical Visual Question Answering: Xuze Li,

Haozhao Wang,

Zhenyu Huang,

Zhongxu Wang,

Jinghua Zhang,

Ruixuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuze and Wang, Haozhao and Huang, Zhenyu and Wang, Zhongxu and Zhang, Jinghua and Li, Ruixuan}, title = {MR-RAG: Multimodal Relevance-Aware Retrieval-Augmented Generation for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15010-15019} }
OctoMed: Data Recipes for State-of-the-Art Multimodal Medical Reasoning: Timothy Ossowski,

Sheng Zhang,

Qianchu Liu,

Guanghui Qin,

Reuben Tan,

Tristan Naumann,

Junjie Hu,

Hoifung Poon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ossowski_2026_CVPR, author = {Ossowski, Timothy and Zhang, Sheng and Liu, Qianchu and Qin, Guanghui and Tan, Reuben and Naumann, Tristan and Hu, Junjie and Poon, Hoifung}, title = {OctoMed: Data Recipes for State-of-the-Art Multimodal Medical Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26251-26261} }
Spatial-SAM: Spatially Consistent 3D Electron Microscopy Segmentation with SDF Memory and Semi-Supervised Learning: Yikai Huang,

Renmin Han,

Yuxuan Wang,

Youcheng Cai,

Ligang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yikai and Han, Renmin and Wang, Yuxuan and Cai, Youcheng and Liu, Ligang}, title = {Spatial-SAM: Spatially Consistent 3D Electron Microscopy Segmentation with SDF Memory and Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22763-22772} }
Bias at the End of the Score: Salma Abdel Magid,

Grace Guo,

Esin Tureci,

Amaya Dharmasiri,

Vikram V. Ramaswamy,

Hanspeter Pfister,

Olga Russakovsky; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magid_2026_CVPR, author = {Magid, Salma Abdel and Guo, Grace and Tureci, Esin and Dharmasiri, Amaya and Ramaswamy, Vikram V. and Pfister, Hanspeter and Russakovsky, Olga}, title = {Bias at the End of the Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24460-24470} }
Similarity-as-Evidence: Calibrating Overconfident VLMs for Interpretable and Label-Efficient Medical Active Learning: Zhuofan Xie,

Zishan Lin,

Jinliang Lin,

Jie Qi,

Shaohua Hong,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Zhuofan and Lin, Zishan and Lin, Jinliang and Qi, Jie and Hong, Shaohua and Li, Shuo}, title = {Similarity-as-Evidence: Calibrating Overconfident VLMs for Interpretable and Label-Efficient Medical Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20973-20984} }
Ground Reaction Inertial Poser: Physics-based Human Motion Capture from Sparse IMUs and Insole Pressure Sensors: Ryosuke Hori,

Jyun-Ting Song,

Zhengyi Luo,

Jinkun Cao,

Soyong Shin,

Hideo Saito,

Kris Kitani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hori_2026_CVPR, author = {Hori, Ryosuke and Song, Jyun-Ting and Luo, Zhengyi and Cao, Jinkun and Shin, Soyong and Saito, Hideo and Kitani, Kris}, title = {Ground Reaction Inertial Poser: Physics-based Human Motion Capture from Sparse IMUs and Insole Pressure Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28435-28445} }
Monocular Open Vocabulary Occupancy Prediction for Indoor Scenes: Changqing Zhou,

Yueru Luo,

Han Zhang,

Zeyu Jiang,

Changhao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Changqing and Luo, Yueru and Zhang, Han and Jiang, Zeyu and Chen, Changhao}, title = {Monocular Open Vocabulary Occupancy Prediction for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21627-21637} }
Kaleidoscopic Scintillation Event Imaging: Alex Bocchieri,

John Mamish,

David Appleyard,

Andreas Velten; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bocchieri_2026_CVPR, author = {Bocchieri, Alex and Mamish, John and Appleyard, David and Velten, Andreas}, title = {Kaleidoscopic Scintillation Event Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19749-19758} }
Select Less, Reason More: Prioritizing Evidence Purity for Video Reasoning: Xuchen Li,

Xuzhao Li,

Shiyu Hu,

Kaiqi Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuchen and Li, Xuzhao and Hu, Shiyu and Huang, Kaiqi}, title = {Select Less, Reason More: Prioritizing Evidence Purity for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25621-25632} }
MultiShotMaster: A Controllable Multi-Shot Video Generation Framework: Qinghe Wang,

Xiaoyu Shi,

Baolu Li,

Weikang Bian,

Quande Liu,

Huchuan Lu,

Xintao Wang,

Pengfei Wan,

Kun Gai,

Xu Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qinghe and Shi, Xiaoyu and Li, Baolu and Bian, Weikang and Liu, Quande and Lu, Huchuan and Wang, Xintao and Wan, Pengfei and Gai, Kun and Jia, Xu}, title = {MultiShotMaster: A Controllable Multi-Shot Video Generation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16268-16278} }
NERFIFY: A Multi-Agent Framework for Turning NeRF Papers into Code: Seemandhar Jain,

Keshav Gupta,

Kunal Gupta,

Manmohan Chandraker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2026_CVPR, author = {Jain, Seemandhar and Gupta, Keshav and Gupta, Kunal and Chandraker, Manmohan}, title = {NERFIFY: A Multi-Agent Framework for Turning NeRF Papers into Code}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24384-24394} }
Gated Condition Injection without Multimodal Attention: Towards Controllable Linear-Attention Transformers: Yuhe Liu,

Zhenxiong Tan,

Yujia Hu,

Songhua Liu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuhe and Tan, Zhenxiong and Hu, Yujia and Liu, Songhua and Wang, Xinchao}, title = {Gated Condition Injection without Multimodal Attention: Towards Controllable Linear-Attention Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23388-23397} }
Unleashing the Power of Chain-of-Prediction for Monocular 3D Object Detection: Zhihao Zhang,

Abhinav Kumar,

Girish Chandar Ganesan,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhihao and Kumar, Abhinav and Ganesan, Girish Chandar and Liu, Xiaoming}, title = {Unleashing the Power of Chain-of-Prediction for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18755-18765} }
Any4D: Unified Feed-Forward Metric 4D Reconstruction: Jay Karhade,

Nikhil Keetha,

Yuchen Zhang,

Tanisha Gupta,

Akash Sharma,

Sebastian Scherer,

Deva Ramanan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karhade_2026_CVPR, author = {Karhade, Jay and Keetha, Nikhil and Zhang, Yuchen and Gupta, Tanisha and Sharma, Akash and Scherer, Sebastian and Ramanan, Deva}, title = {Any4D: Unified Feed-Forward Metric 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14578-14589} }
It Takes Two: A Duet of Periodicity and Directionality for Burst Flicker Removal: Lishen Qu,

Shihao Zhou,

Jie Liang,

Hui Zeng,

Lei Zhang,

Jufeng Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Lishen and Zhou, Shihao and Liang, Jie and Zeng, Hui and Zhang, Lei and Yang, Jufeng}, title = {It Takes Two: A Duet of Periodicity and Directionality for Burst Flicker Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15516-15527} }
DiffuView: Multi-View Diffusion Pretraining for 3D Aware Robotic Manipulation: Kaizhao Zhang,

Tian Niu,

Tianyu Liu,

Chenen Guo,

Zijun Xu,

Qingda Hu,

Wenchao Ding; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaizhao and Niu, Tian and Liu, Tianyu and Guo, Chenen and Xu, Zijun and Hu, Qingda and Ding, Wenchao}, title = {DiffuView: Multi-View Diffusion Pretraining for 3D Aware Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23601-23611} }
Learning from Noisy Supervision: A Denoising-Debiasing Framework for Weakly Supervised Video Anomaly Detection: Yaxin Zhao,

Yang Wang,

Wenya Guo,

Sihan Xu,

Xiangrui Cai,

Xi Lin,

Ying Zhang,

Xiaojie Yuan; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yaxin and Wang, Yang and Guo, Wenya and Xu, Sihan and Cai, Xiangrui and Lin, Xi and Zhang, Ying and Yuan, Xiaojie}, title = {Learning from Noisy Supervision: A Denoising-Debiasing Framework for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21326-21335} }
Revisiting Pose Sensitivity in Splat-based Computed Tomography under Sparse-view Reconstruction: Kiseok Choi,

Hyeongjun Cho,

Inchul Kim,

Min H. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Kiseok and Cho, Hyeongjun and Kim, Inchul and Kim, Min H.}, title = {Revisiting Pose Sensitivity in Splat-based Computed Tomography under Sparse-view Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25969-25978} }
Real2Edit2Real: Generating Robotic Demonstrations via a 3D Control Interface: Yujie Zhao,

Hongwei Fan,

Di Chen,

Shengcong Chen,

Liliang Chen,

Xiaoqi Li,

Guanghui Ren,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yujie and Fan, Hongwei and Chen, Di and Chen, Shengcong and Chen, Liliang and Li, Xiaoqi and Ren, Guanghui and Dong, Hao}, title = {Real2Edit2Real: Generating Robotic Demonstrations via a 3D Control Interface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23106-23116} }
Chart-FR1: Visual Focus-Driven Fine-Grained Reasoning on Dense Charts: Hongkun Pan,

Yuwei Wu,

Wanyi Hong,

Shenghui Hu,

Qitong Yan,

Yi Yang,

Rufei Han,

Changju Zhou,

Minfeng Zhu,

Dongming Han,

Wei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Hongkun and Wu, Yuwei and Hong, Wanyi and Hu, Shenghui and Yan, Qitong and Yang, Yi and Han, Rufei and Zhou, Changju and Zhu, Minfeng and Han, Dongming and Chen, Wei}, title = {Chart-FR1: Visual Focus-Driven Fine-Grained Reasoning on Dense Charts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26285-26294} }
Disentangle-then-Align: Non-Iterative Hybrid Multimodal Image Registration via Cross-Scale Feature Disentanglement: Chunlei Zhang,

Jiahao Xia,

Yun Xiao,

Bo Jiang,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chunlei and Xia, Jiahao and Xiao, Yun and Jiang, Bo and Zhang, Jian}, title = {Disentangle-then-Align: Non-Iterative Hybrid Multimodal Image Registration via Cross-Scale Feature Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15912-15921} }
PromptDepth: Efficient and Promptable Geometric 3D Vision Model for Embodied Intelligence: Xianyun Wang,

Jiaxu Miao,

Tian Xu,

Siyuan Wang,

Yuehao Li,

Haoyang Hu,

Jun Xiao,

Yonghong Tian,

Jun Yu; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xianyun and Miao, Jiaxu and Xu, Tian and Wang, Siyuan and Li, Yuehao and Hu, Haoyang and Xiao, Jun and Tian, Yonghong and Yu, Jun}, title = {PromptDepth: Efficient and Promptable Geometric 3D Vision Model for Embodied Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28074-28085} }
Residual Decoding: Mitigating Hallucinations in Large Vision-Language Models via History-Aware Residual Guidance: Xinrong Chen,

Xu Chu,

Yingmin Qiu,

Hengyuan Zhang,

Jing Xiong,

Shiyu Tang,

Shuai Liu,

Shaokang Yang,

Cheng Yang,

Hayden Kwok-Hay So,

Ngai Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xinrong and Chu, Xu and Qiu, Yingmin and Zhang, Hengyuan and Xiong, Jing and Tang, Shiyu and Liu, Shuai and Yang, Shaokang and Yang, Cheng and So, Hayden Kwok-Hay and Wong, Ngai}, title = {Residual Decoding: Mitigating Hallucinations in Large Vision-Language Models via History-Aware Residual Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25281-25292} }
PoInit-of-View: Poisoning Initialization of Views Transfers Across Multiple 3D Reconstruction Systems: Weijie Wang,

Songlong Xing,

Zhengyu Zhao,

Nicu Sebe,

Bruno Lepri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Weijie and Xing, Songlong and Zhao, Zhengyu and Sebe, Nicu and Lepri, Bruno}, title = {PoInit-of-View: Poisoning Initialization of Views Transfers Across Multiple 3D Reconstruction Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20670-20679} }
CARE: A Molecular-Guided Foundation Model with Adaptive Region Modeling for Whole Slide Image Analysis: Di Zhang,

Zhangpeng Gong,

Xiaobo Pang,

Jiashuai Liu,

Junbo Lu,

Hao Cui,

Jiusong Ge,

Zhi Zeng,

Kai Yi,

Yinghua Li,

Si Liu,

Tingsong Yu,

Haoran Wang,

Mireia Crispin-Ortuzar,

Weimiao Yu,

Chen Li,

Zeyu Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Di and Gong, Zhangpeng and Pang, Xiaobo and Liu, Jiashuai and Lu, Junbo and Cui, Hao and Ge, Jiusong and Zeng, Zhi and Yi, Kai and Li, Yinghua and Liu, Si and Yu, Tingsong and Wang, Haoran and Crispin-Ortuzar, Mireia and Yu, Weimiao and Li, Chen and Gao, Zeyu}, title = {CARE: A Molecular-Guided Foundation Model with Adaptive Region Modeling for Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21078-21088} }
A2GC: Asymmetric Aggregation with Geometric Constraints for Locally Aggregated Descriptors: Zhenyu Li,

Tianyi Shang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhenyu and Shang, Tianyi}, title = {A2GC: Asymmetric Aggregation with Geometric Constraints for Locally Aggregated Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19423-19431} }
Decoupling Stability and Plasticity for Multi-Modal Test-Time Adaptation: Yongbo He,

Zirun Guo,

Tao Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yongbo and Guo, Zirun and Jin, Tao}, title = {Decoupling Stability and Plasticity for Multi-Modal Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15020-15029} }
MedLoc-R1: Performance-Aware Curriculum Reward Scheduling for GRPO-Based Medical Visual Grounding: Guangjing Yang,

Ziyuan Qin,

Chaoran Zhang,

Chenlin Du,

Jinglin Wang,

Wanran Sun,

Zhenyu Zhang,

Bing Ji,

Qicheng Lao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Guangjing and Qin, Ziyuan and Zhang, Chaoran and Du, Chenlin and Wang, Jinglin and Sun, Wanran and Zhang, Zhenyu and Ji, Bing and Lao, Qicheng}, title = {MedLoc-R1: Performance-Aware Curriculum Reward Scheduling for GRPO-Based Medical Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21036-21045} }
One Layer's Trash is Another Layer's Treasure: Adaptive Layer-wise Visual Token Selection in LVLMs: Yongru Chen,

Kai Zhang,

Zeliang Zong,

Yuchen Lu,

Wenming Tan,

Ye Ren,

Jilin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yongru and Zhang, Kai and Zong, Zeliang and Lu, Yuchen and Tan, Wenming and Ren, Ye and Hu, Jilin}, title = {One Layer's Trash is Another Layer's Treasure: Adaptive Layer-wise Visual Token Selection in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17672-17681} }
R2-Seg: Training-Free OOD Medical Tumor Segmentation via Anatomical Reasoning and Statistical Rejection: Shuaike Shen,

Ke Liu,

Jiaqing Xie,

Shangde Gao,

Chunhua Shen,

Ge Liu,

Mireia Crispin-Ortuzar,

Shangqi Gao; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Shuaike and Liu, Ke and Xie, Jiaqing and Gao, Shangde and Shen, Chunhua and Liu, Ge and Crispin-Ortuzar, Mireia and Gao, Shangqi}, title = {R2-Seg: Training-Free OOD Medical Tumor Segmentation via Anatomical Reasoning and Statistical Rejection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21669-21678} }
RecEdit-Drive: 3D Reconstruction-Guided Spatiotemporal Video Editing for Autonomous Driving Scenes: Yipeng Wu,

Xin Wang,

Chenghan Yang,

Chong Wang,

Dongdong Wu,

Wanchao Su,

Hengshuang Zhao,

Wei Feng,

Kairui Yang,

Di Lin; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yipeng and Wang, Xin and Yang, Chenghan and Wang, Chong and Wu, Dongdong and Su, Wanchao and Zhao, Hengshuang and Feng, Wei and Yang, Kairui and Lin, Di}, title = {RecEdit-Drive: 3D Reconstruction-Guided Spatiotemporal Video Editing for Autonomous Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25415-25425} }
Particulate: Feed-Forward 3D Object Articulation: Ruining Li,

Yuxin Yao,

Chuanxia Zheng,

Christian Rupprecht,

Joan Lasenby,

Shangzhe Wu,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ruining and Yao, Yuxin and Zheng, Chuanxia and Rupprecht, Christian and Lasenby, Joan and Wu, Shangzhe and Vedaldi, Andrea}, title = {Particulate: Feed-Forward 3D Object Articulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27708-27718} }
Revisiting Learning with Noisy Labels: Active Forgetting and Noise Suppression: Mengmeng Sheng,

Zeren Sun,

Tao Chen,

Jinshan Pan,

Yazhou Yao,

Fumin Shen; [pdf]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Mengmeng and Sun, Zeren and Chen, Tao and Pan, Jinshan and Yao, Yazhou and Shen, Fumin}, title = {Revisiting Learning with Noisy Labels: Active Forgetting and Noise Suppression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24792-24802} }
Fully Decentralized Certified Unlearning: Hithem Lamri,

Michail Maniatakos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lamri_2026_CVPR, author = {Lamri, Hithem and Maniatakos, Michail}, title = {Fully Decentralized Certified Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24577-24586} }
CG-Floor: Centroid-Guided Diffusion for Large-Scale Floorplan Generation: Hongjin Lian,

Jian Ma,

Hongjie Chen,

Jia Li,

Ruizhen Hu,

Yu-Kun Lai,

Kun Li; [pdf] [supp]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Hongjin and Ma, Jian and Chen, Hongjie and Li, Jia and Hu, Ruizhen and Lai, Yu-Kun and Li, Kun}, title = {CG-Floor: Centroid-Guided Diffusion for Large-Scale Floorplan Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18354-18363} }
CF-IPT: Cross-Modal Fusion Interactive Prompt Tuning of Vision-Language Pre-Trained Model for Multisource Remote Sensing Data Classification: Jinheng Ji,

Jiahui Qu,

Wenqian Dong,

Yunsong Li; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Jinheng and Qu, Jiahui and Dong, Wenqian and Li, Yunsong}, title = {CF-IPT: Cross-Modal Fusion Interactive Prompt Tuning of Vision-Language Pre-Trained Model for Multisource Remote Sensing Data Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23021-23030} }
Ego-1K - A Large-Scale Multiview Video Dataset for Egocentric Vision: Jae Yong Lee,

Daniel Scharstein,

Akash Bapat,

Hao Hu,

Andrew Fu,

Haoru Zhao,

Paul Sammut,

Xiang Li,

Stephen Jeapes,

Anik Gupta,

Lior David,

Saketh Madhuvarasu,

Jay Girish Joshi,

Jason Wither; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jae Yong and Scharstein, Daniel and Bapat, Akash and Hu, Hao and Fu, Andrew and Zhao, Haoru and Sammut, Paul and Li, Xiang and Jeapes, Stephen and Gupta, Anik and David, Lior and Madhuvarasu, Saketh and Joshi, Jay Girish and Wither, Jason}, title = {Ego-1K - A Large-Scale Multiview Video Dataset for Egocentric Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19854-19863} }
LensWalk: Agentic Video Understanding by Planning How You See in Videos: Keliang Li,

Yansong Li,

Hongze Shen,

Mengdi Liu,

Hong Chang,

Shiguang Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Keliang and Li, Yansong and Shen, Hongze and Liu, Mengdi and Chang, Hong and Shan, Shiguang}, title = {LensWalk: Agentic Video Understanding by Planning How You See in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19518-19528} }
Gaussian Mapping for Evolving Scenes: Vladimir Yugay,

Thies Kersten,

Luca Carlone,

Theo Gevers,

Martin R. Oswald,

Lukas Schmid; [pdf] [arXiv]
[bibtex]
@InProceedings{Yugay_2026_CVPR, author = {Yugay, Vladimir and Kersten, Thies and Carlone, Luca and Gevers, Theo and Oswald, Martin R. and Schmid, Lukas}, title = {Gaussian Mapping for Evolving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18903-18912} }
Memory-Efficient Transfer Learning with Fading Side Networks via Masked Dual Path Distillation: Yutong Zhang,

Jiaxin Chen,

Honglin Chen,

Kaiqi Zheng,

Shengcai Liao,

Hanwen Zhong,

Weixin Li,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yutong and Chen, Jiaxin and Chen, Honglin and Zheng, Kaiqi and Liao, Shengcai and Zhong, Hanwen and Li, Weixin and Wang, Yunhong}, title = {Memory-Efficient Transfer Learning with Fading Side Networks via Masked Dual Path Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25043-25054} }
LAOF: Robust Latent Action Learning with Optical Flow Constraints: Xizhou Bu,

Jiexi Lyu,

Fulei Sun,

Ruichen Yang,

Zhiqiang Ma,

Wei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bu_2026_CVPR, author = {Bu, Xizhou and Lyu, Jiexi and Sun, Fulei and Yang, Ruichen and Ma, Zhiqiang and Li, Wei}, title = {LAOF: Robust Latent Action Learning with Optical Flow Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27334-27344} }
Seeing Depth Through Frequency and Motion: A Progressive Training Paradigm for Monocular Depth Estimation: Ke Li,

Bolin Song,

Hongbo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ke and Song, Bolin and Liu, Hongbo}, title = {Seeing Depth Through Frequency and Motion: A Progressive Training Paradigm for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26845-26854} }
See, Think, Act: Teaching Multimodal Agents to Effectively Interact with GUI by Identifying Toggles: Zongru Wu,

Rui Mao,

Zhiyuan Tian,

Pengzhou Cheng,

Tianjie Ju,

Zheng Wu,

Lingzhong Dong,

Haiyue Sheng,

Zhuosheng Zhang,

Gongshen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zongru and Mao, Rui and Tian, Zhiyuan and Cheng, Pengzhou and Ju, Tianjie and Wu, Zheng and Dong, Lingzhong and Sheng, Haiyue and Zhang, Zhuosheng and Liu, Gongshen}, title = {See, Think, Act: Teaching Multimodal Agents to Effectively Interact with GUI by Identifying Toggles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27536-27546} }
Reevaluating the Intra-Modal Misalignment Hypothesis in CLIP: Jonas Herzog,

Yue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Herzog_2026_CVPR, author = {Herzog, Jonas and Wang, Yue}, title = {Reevaluating the Intra-Modal Misalignment Hypothesis in CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24310-24319} }
Representation-Steered Incremental Adapter-Tuning for Class-Incremental Learning with Pre-Trained Models: Jiarui Zhao,

Libo Huang,

Xiangqi Li,

Zhulin An,

Chuanguang Yang,

Yu Wang,

Boyu Diao,

Yongjun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiarui and Huang, Libo and Li, Xiangqi and An, Zhulin and Yang, Chuanguang and Wang, Yu and Diao, Boyu and Xu, Yongjun}, title = {Representation-Steered Incremental Adapter-Tuning for Class-Incremental Learning with Pre-Trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18010-18020} }
When Visualizing is the First Step to Reasoning: MIRA, a Benchmark for Visual Chain-of-Thought: Yiyang Zhou,

Haoqin Tu,

Zijun Wang,

Zeyu Wang,

Niklas Muennighoff,

Fan Nie,

Chaorui Deng,

Shen Yan,

Haoqi Fan,

Yejin Choi,

James Zou,

Cihang Xie,

Huaxiu Yao,

Qinghao Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yiyang and Tu, Haoqin and Wang, Zijun and Wang, Zeyu and Muennighoff, Niklas and Nie, Fan and Deng, Chaorui and Yan, Shen and Fan, Haoqi and Choi, Yejin and Zou, James and Xie, Cihang and Yao, Huaxiu and Ye, Qinghao}, title = {When Visualizing is the First Step to Reasoning: MIRA, a Benchmark for Visual Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26154-26164} }
Towards Reliable Evaluation of Adversarial Robustness for Spiking Neural Networks: Jihang Wang,

Dongcheng Zhao,

Ruolin Chen,

Qian Zhang,

Yi Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jihang and Zhao, Dongcheng and Chen, Ruolin and Zhang, Qian and Zeng, Yi}, title = {Towards Reliable Evaluation of Adversarial Robustness for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20691-20700} }
MeshMosaic: Scaling Artist Mesh Generation via Local-to-Global Assembly: Rui Xu,

Tianyang Xue,

Qiujie Dong,

Le Wan,

Zhe Zhu,

Peng Li,

Zhiyang Dou,

Cheng Lin,

Shiqing Xin,

Yuan Liu,

Wenping Wang,

Taku Komura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Rui and Xue, Tianyang and Dong, Qiujie and Wan, Le and Zhu, Zhe and Li, Peng and Dou, Zhiyang and Lin, Cheng and Xin, Shiqing and Liu, Yuan and Wang, Wenping and Komura, Taku}, title = {MeshMosaic: Scaling Artist Mesh Generation via Local-to-Global Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20003-20013} }
OS-Oracle: A Comprehensive Framework for Cross-Platform GUI Critic Models: Zhenyu Wu,

Jingjing Xie,

Zehao Li,

Bowen Yang,

Qiushi Sun,

Zhaoyang Liu,

Zhoumianze Liu,

Yu Qiao,

Xiangyu Yue,

Zun Wang,

Zichen Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenyu and Xie, Jingjing and Li, Zehao and Yang, Bowen and Sun, Qiushi and Liu, Zhaoyang and Liu, Zhoumianze and Qiao, Yu and Yue, Xiangyu and Wang, Zun and Ding, Zichen}, title = {OS-Oracle: A Comprehensive Framework for Cross-Platform GUI Critic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27514-27524} }
Seele: A Unified Acceleration Framework for Real-Time Gaussian Splatting on Mobile Devices: He Zhu,

Xiaotong Huang,

Zihan Liu,

Weikai Lin,

Xiaohong Liu,

Zhezhi He,

Jingwen Leng,

Minyi Guo,

Yu Feng; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, He and Huang, Xiaotong and Liu, Zihan and Lin, Weikai and Liu, Xiaohong and He, Zhezhi and Leng, Jingwen and Guo, Minyi and Feng, Yu}, title = {Seele: A Unified Acceleration Framework for Real-Time Gaussian Splatting on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25979-25989} }
Stitch-a-Demo: Creating Video Demonstrations from Multistep Descriptions: Chi Hsuan Wu,

Kumar Ashutosh,

Kristen Grauman; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chi Hsuan and Ashutosh, Kumar and Grauman, Kristen}, title = {Stitch-a-Demo: Creating Video Demonstrations from Multistep Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23988-23999} }
Gaussian Splatting-based Low-Rank Tensor Representation for Multi-Dimensional Image Recovery: Yiming Zeng,

Xi-Le Zhao,

Wei-Hao Wu,

Teng-Yu Ji,

Chao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yiming and Zhao, Xi-Le and Wu, Wei-Hao and Ji, Teng-Yu and Wang, Chao}, title = {Gaussian Splatting-based Low-Rank Tensor Representation for Multi-Dimensional Image Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19360-19369} }
Improving Adversarial Transferability with Local Perturbation Augmentation: Jian-Xun Mi,

Xuanhui Zhong,

Weisheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Mi_2026_CVPR, author = {Mi, Jian-Xun and Zhong, Xuanhui and Li, Weisheng}, title = {Improving Adversarial Transferability with Local Perturbation Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20639-20649} }
FlowFixer: Towards Detail-Preserving Subject-Driven Generation: Jinyoung Jun,

Won-Dong Jang,

Wenbin Ouyang,

Raghudeep Gadde,

Jungbeom Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jun_2026_CVPR, author = {Jun, Jinyoung and Jang, Won-Dong and Ouyang, Wenbin and Gadde, Raghudeep and Lee, Jungbeom}, title = {FlowFixer: Towards Detail-Preserving Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22049-22058} }
Efficient Encoder-Free Fourier-based 3D Large Multimodal Model: Guofeng Mei,

Wei Lin,

Luigi Riz,

Yujiao Wu,

Yiming Wang,

Fabio Poiesi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2026_CVPR, author = {Mei, Guofeng and Lin, Wei and Riz, Luigi and Wu, Yujiao and Wang, Yiming and Poiesi, Fabio}, title = {Efficient Encoder-Free Fourier-based 3D Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23785-23794} }
Scal3R: Scalable Test-Time Training for Large-Scale 3D Reconstruction: Tao Xie,

Peishan Yang,

Yudong Jin,

Yingfeng Cai,

Wei Yin,

Weiqiang Ren,

Qian Zhang,

Wei Hua,

Sida Peng,

Xiaoyang Guo,

Xiaowei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tao and Yang, Peishan and Jin, Yudong and Cai, Yingfeng and Yin, Wei and Ren, Weiqiang and Zhang, Qian and Hua, Wei and Peng, Sida and Guo, Xiaoyang and Zhou, Xiaowei}, title = {Scal3R: Scalable Test-Time Training for Large-Scale 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21760-21771} }
SparVAR: Exploring Sparsity in Visual AutoRegressive Modeling for Training-Free Acceleration: Zekun Li,

Ning Wang,

Tongxin Bai,

Changwang Mei,

Peisong Wang,

Shuang Qiu,

Jian Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zekun and Wang, Ning and Bai, Tongxin and Mei, Changwang and Wang, Peisong and Qiu, Shuang and Cheng, Jian}, title = {SparVAR: Exploring Sparsity in Visual AutoRegressive Modeling for Training-Free Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19308-19318} }
Head-wise Adaptive Rotary Positional Encoding for Fine-Grained Image Generation: Jiaye Li,

Baoyou Chen,

Hui Li,

Zilong Dong,

Jingdong Wang,

Siyu Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiaye and Chen, Baoyou and Li, Hui and Dong, Zilong and Wang, Jingdong and Zhu, Siyu}, title = {Head-wise Adaptive Rotary Positional Encoding for Fine-Grained Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26338-26347} }
Eulerian Gaussian Splatting using Hashed Probability Pyramids: Mia Gaia Polansky,

George Kopanas,

Stephan Garbin,

Todd Zickler,

Dor Verbin; [pdf] [supp]
[bibtex]
@InProceedings{Polansky_2026_CVPR, author = {Polansky, Mia Gaia and Kopanas, George and Garbin, Stephan and Zickler, Todd and Verbin, Dor}, title = {Eulerian Gaussian Splatting using Hashed Probability Pyramids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19045-19053} }
Lyapunov Probes for Hallucination Detection in Large Foundation Models: Bozhi Luan,

Gen Li,

Yalan Qin,

Jifeng Guo,

Yun Zhou,

Faguo Wu,

Hongwei Zheng,

Wenjun Wu,

Zhaoxin Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Luan_2026_CVPR, author = {Luan, Bozhi and Li, Gen and Qin, Yalan and Guo, Jifeng and Zhou, Yun and Wu, Faguo and Zheng, Hongwei and Wu, Wenjun and Fan, Zhaoxin}, title = {Lyapunov Probes for Hallucination Detection in Large Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25336-25346} }
Do VLMs Perceive or Recall? Probing Visual Perception vs. Memory with Classic Visual Illusions: Xiaoxiao Sun,

Mingyang Li,

Kun Yuan,

Min Woo Sun,

Mark Endo,

Shengguang Wu,

Changlin Li,

Yuhui Zhang,

Zeyu Wang,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaoxiao and Li, Mingyang and Yuan, Kun and Sun, Min Woo and Endo, Mark and Wu, Shengguang and Li, Changlin and Zhang, Yuhui and Wang, Zeyu and Yeung-Levy, Serena}, title = {Do VLMs Perceive or Recall? Probing Visual Perception vs. Memory with Classic Visual Illusions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25861-25870} }
R$^2$TUA: Reconstruction-residual Based Targeted and Untargeted Attack Against Text-Image Person Re-Identification: Yubo Wang,

Yan Lu,

Bin Liu,

Xulin Li,

Jixiang Niu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yubo and Lu, Yan and Liu, Bin and Li, Xulin and Niu, Jixiang}, title = {R\${\textasciicircum}2\$TUA: Reconstruction-residual Based Targeted and Untargeted Attack Against Text-Image Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22857-22866} }
SemLT3D: Semantic-Guided Expert Distillation for Camera-only Long-Tailed 3D Object Detection: Hao Vo,

Khoa Vo,

Thinh Phan,

Ngo Xuan Cuong,

Gianfranco Doretto,

Hien Nguyen,

Anh Nguyen,

Ngan Le; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vo_2026_CVPR, author = {Vo, Hao and Vo, Khoa and Phan, Thinh and Cuong, Ngo Xuan and Doretto, Gianfranco and Nguyen, Hien and Nguyen, Anh and Le, Ngan}, title = {SemLT3D: Semantic-Guided Expert Distillation for Camera-only Long-Tailed 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25788-25798} }
Where, What, Why: Toward Explainable 3D-GS Watermarking: Mingshu Cai,

Jiajun Li,

Osamu Yoshie,

Yuya Ieiri,

Yixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Mingshu and Li, Jiajun and Yoshie, Osamu and Ieiri, Yuya and Li, Yixuan}, title = {Where, What, Why: Toward Explainable 3D-GS Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20701-20710} }
WEAVE: Unleashing and Benchmarking the In-context Interleaved Comprehension and Generation: Wei Chow,

Jiachun Pan,

Yongyuan Liang,

Mingze Zhou,

Xue Song,

Liyu Jia,

Saining Zhang,

Siliang Tang,

Juncheng Li,

Fengda Zhang,

Weijia Wu,

Hanwang Zhang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chow_2026_CVPR, author = {Chow, Wei and Pan, Jiachun and Liang, Yongyuan and Zhou, Mingze and Song, Xue and Jia, Liyu and Zhang, Saining and Tang, Siliang and Li, Juncheng and Zhang, Fengda and Wu, Weijia and Zhang, Hanwang and Chua, Tat-Seng}, title = {WEAVE: Unleashing and Benchmarking the In-context Interleaved Comprehension and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15343-15353} }
Where Culture Fades: Revealing the Cultural Gap in Text-to-Image Generation: Chuancheng Shi,

Shangze Li,

Shiming Guo,

Simiao Xie,

Wenhua Wu,

Jingtong Dou,

Chao Wu,

Canran Xiao,

Cong Wang,

Zifeng Cheng,

Fei Shen,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Chuancheng and Li, Shangze and Guo, Shiming and Xie, Simiao and Wu, Wenhua and Dou, Jingtong and Wu, Chao and Xiao, Canran and Wang, Cong and Cheng, Zifeng and Shen, Fei and Chua, Tat-Seng}, title = {Where Culture Fades: Revealing the Cultural Gap in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14702-14712} }
SPE-MVS: Spatial Position Encoding Enhanced Multi-View Stereo with Monocular Depth Priors: Shaoqian Wang,

Jiadai Sun,

Bosen Hou,

Qiang Wang,

Bin Fan,

Bo Li,

Bin Lu,

Yuchao Dai; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shaoqian and Sun, Jiadai and Hou, Bosen and Wang, Qiang and Fan, Bin and Li, Bo and Lu, Bin and Dai, Yuchao}, title = {SPE-MVS: Spatial Position Encoding Enhanced Multi-View Stereo with Monocular Depth Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14536-14545} }
No Calibration, No Depth, No Problem: Cross-Sensor View Synthesis with 3D Consistency: Cho-Ying Wu,

Zixun Huang,

Xinyu Huang,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Cho-Ying and Huang, Zixun and Huang, Xinyu and Ren, Liu}, title = {No Calibration, No Depth, No Problem: Cross-Sensor View Synthesis with 3D Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21836-21848} }
Unified Spatiotemporal Token Compression for Video-LLMs at Ultra-Low Retention: Junhao Du,

Jialong Xue,

Anqi Li,

Jincheng Dai,

Guo Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Junhao and Xue, Jialong and Li, Anqi and Dai, Jincheng and Lu, Guo}, title = {Unified Spatiotemporal Token Compression for Video-LLMs at Ultra-Low Retention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17661-17671} }
Hybrid Robust Collaborative Perception with LiDAR-4D Radar Fusion under Adverse Weather Conditions: Yuquan Yang,

Hui Zhang,

Wenyu Lu,

Ziyin Zhang,

Chuanming Zhang,

Xiaohua Xu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuquan and Zhang, Hui and Lu, Wenyu and Zhang, Ziyin and Zhang, Chuanming and Xu, Xiaohua}, title = {Hybrid Robust Collaborative Perception with LiDAR-4D Radar Fusion under Adverse Weather Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24237-24247} }
Guiding a Diffusion Model by Swapping Its Tokens: Weijia Zhang,

Yuehao Liu,

Shanyan Guan,

Wu Ran,

Yanhao Ge,

Wei Li,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Weijia and Liu, Yuehao and Guan, Shanyan and Ran, Wu and Ge, Yanhao and Li, Wei and Ma, Chao}, title = {Guiding a Diffusion Model by Swapping Its Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14263-14272} }
CARD: A Multi-Modal Automotive Dataset for Dense 3D Reconstruction in Challenging Road Topography: Gasser Elazab,

Frank Neuhaus,

Tilman Koß,

Malte Splietker,

Aditya Date,

Michael Unterreiner,

Maximilian Jansen,

Olaf Hellwich; [pdf] [supp]
[bibtex]
@InProceedings{Elazab_2026_CVPR, author = {Elazab, Gasser and Neuhaus, Frank and Ko{\ss}, Tilman and Splietker, Malte and Date, Aditya and Unterreiner, Michael and Jansen, Maximilian and Hellwich, Olaf}, title = {CARD: A Multi-Modal Automotive Dataset for Dense 3D Reconstruction in Challenging Road Topography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17820-17830} }
Next-Scale Prediction: A Self-Supervised Approach for Real-World Image Denoising: Yiwen Shan,

Haiyu Zhao,

Peng Hu,

Xi Peng,

Yuanbiao Gou; [pdf] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Yiwen and Zhao, Haiyu and Hu, Peng and Peng, Xi and Gou, Yuanbiao}, title = {Next-Scale Prediction: A Self-Supervised Approach for Real-World Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22669-22678} }
Mitigating Simplicity Bias in OOD Detection through Object Co-occurrence Analysis: Boyang Dai,

Chaoqi Chen,

Yizhou Yu; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Boyang and Chen, Chaoqi and Yu, Yizhou}, title = {Mitigating Simplicity Bias in OOD Detection through Object Co-occurrence Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20345-20355} }
UIKA: Fast Universal Head Avatar from Pose-Free Images: Zijian Wu,

Boyao Zhou,

Liangxiao Hu,

Hongyu Liu,

Yuan Sun,

Xuan Wang,

Xun Cao,

Yujun Shen,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zijian and Zhou, Boyao and Hu, Liangxiao and Liu, Hongyu and Sun, Yuan and Wang, Xuan and Cao, Xun and Shen, Yujun and Zhu, Hao}, title = {UIKA: Fast Universal Head Avatar from Pose-Free Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18216-18228} }
AGFT: Alignment-Guided Fine-Tuning for Zero-Shot Adversarial Robustness of Vision-Language Models: Yubo Cui,

Xianchao Guan,

Zijun Xiong,

Zheng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Yubo and Guan, Xianchao and Xiong, Zijun and Zhang, Zheng}, title = {AGFT: Alignment-Guided Fine-Tuning for Zero-Shot Adversarial Robustness of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22836-22846} }
Coupling Liquid Time-Constant Encoders with Modern Hopfield Memory: Bishal Ranjan Swain,

Kyung Joo Cheoi,

Jaepil Ko; [pdf]
[bibtex]
@InProceedings{Swain_2026_CVPR, author = {Swain, Bishal Ranjan and Cheoi, Kyung Joo and Ko, Jaepil}, title = {Coupling Liquid Time-Constant Encoders with Modern Hopfield Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27409-27417} }
OmniGen2: Towards Instruction-Aligned Multimodal Generation: Chenyuan Wu,

Jiahao Wang,

Pengfei Zheng,

Ruiran Yan,

Shitao Xiao,

Xin Luo,

Yueze Wang,

Wanli Li,

Xiyan Jiang,

Yexin Liu,

Junjie Zhou,

Ziyi Xia,

Ze Liu,

Chaofan Li,

Haoge Deng,

Kun Luo,

Bo Zhang,

Jiajun Zhang,

Dong Liu,

Defu Lian,

Xinlong Wang,

Zhongyuan Wang,

Tiejun Huang,

Zheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyuan and Wang, Jiahao and Zheng, Pengfei and Yan, Ruiran and Xiao, Shitao and Luo, Xin and Wang, Yueze and Li, Wanli and Jiang, Xiyan and Liu, Yexin and Zhou, Junjie and Xia, Ziyi and Liu, Ze and Li, Chaofan and Deng, Haoge and Luo, Kun and Zhang, Bo and Zhang, Jiajun and Liu, Dong and Lian, Defu and Wang, Xinlong and Wang, Zhongyuan and Huang, Tiejun and Liu, Zheng}, title = {OmniGen2: Towards Instruction-Aligned Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21964-21975} }
MSJoE: Jointly Evolving MLLM and Sampler for Efficient Long-Form Video Understanding: Wenhui Tan,

Xiaoyi Yu,

Jiaze Li,

Yijing Chen,

Jianzhong Ju,

Zhenbo Luo,

Ruihua Song,

Jian Luan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Wenhui and Yu, Xiaoyi and Li, Jiaze and Chen, Yijing and Ju, Jianzhong and Luo, Zhenbo and Song, Ruihua and Luan, Jian}, title = {MSJoE: Jointly Evolving MLLM and Sampler for Efficient Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19487-19496} }
PQDT: Pseudo-Query Dual Transformer for Robust Point Cloud Restoration: Haoqing Wu,

Alexa Nawotki,

Jochen Garcke; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haoqing and Nawotki, Alexa and Garcke, Jochen}, title = {PQDT: Pseudo-Query Dual Transformer for Robust Point Cloud Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24195-24205} }
VisualAD: Language-Free Zero-Shot Anomaly Detection via Vision Transformer: Yanning Hou,

Peiyuan Li,

Zirui Liu,

Yitong Wang,

Yanran Ruan,

Jianfeng Qiu,

Ke Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Yanning and Li, Peiyuan and Liu, Zirui and Wang, Yitong and Ruan, Yanran and Qiu, Jianfeng and Xu, Ke}, title = {VisualAD: Language-Free Zero-Shot Anomaly Detection via Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21346-21356} }
Shape-of-You: Fused Gromov-Wasserstein Optimal Transport for Semantic Correspondence in-the-Wild: Jiin Im,

Sisung Liu,

Je Hyeong Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Im_2026_CVPR, author = {Im, Jiin and Liu, Sisung and Hong, Je Hyeong}, title = {Shape-of-You: Fused Gromov-Wasserstein Optimal Transport for Semantic Correspondence in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27730-27739} }
TRIDENT: A Trimodal Cascade Generative Framework for Drug and RNA-Conditioned Cellular Morphology Synthesis: Rui Peng,

Ziru Liu,

Lingyuan Ye,

Yuxing Lu,

Boxin Shi,

Jinzhuo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Rui and Liu, Ziru and Ye, Lingyuan and Lu, Yuxing and Shi, Boxin and Wang, Jinzhuo}, title = {TRIDENT: A Trimodal Cascade Generative Framework for Drug and RNA-Conditioned Cellular Morphology Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26803-26812} }
When Numbers Speak: Aligning Textual Numerals and Visual Instances in Text-to-Video Diffusion Models: Zhengyang Sun,

Yu Chen,

Xin Zhou,

Xiaofan Li,

Xiwu Chen,

Dingkang Liang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhengyang and Chen, Yu and Zhou, Xin and Li, Xiaofan and Chen, Xiwu and Liang, Dingkang and Bai, Xiang}, title = {When Numbers Speak: Aligning Textual Numerals and Visual Instances in Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24268-24278} }
Soft Modality-Guided Expert Specialization in MoE-VLMs: Zi-Hao Bo,

Yaqian Li,

Anzhou Hou,

Rinyoichi Takezoe,

Ertao Zhao,

Tianxiang Pan,

Jiale Yan,

Mo Guang,

Kaiwen Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Zi-Hao and Li, Yaqian and Hou, Anzhou and Takezoe, Rinyoichi and Zhao, Ertao and Pan, Tianxiang and Yan, Jiale and Guang, Mo and Long, Kaiwen}, title = {Soft Modality-Guided Expert Specialization in MoE-VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24330-24340} }
Critical Patch-Aware Sparse Prompting with Decoupled Training for Continual Learning on the Edge: Wonseon Lim,

Jaesung Lee,

Dae-Won Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Wonseon and Lee, Jaesung and Kim, Dae-Won}, title = {Critical Patch-Aware Sparse Prompting with Decoupled Training for Continual Learning on the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17989-17998} }
Streamlined Open-Vocabulary Human-Object Interaction Detection: Chang Sun,

Dongliang Liao,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Chang and Liao, Dongliang and Ding, Changxing}, title = {Streamlined Open-Vocabulary Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20324-20333} }
Multi-Prototype Compactness and Boundary-Aware Synthesis for Unsupervised Anomaly Detection: Kailun Liao,

Jianfeng Yang,

Tao Tao,

Wenfei Wu,

Jiaming Jiang,

Jinsheng Xiao; [pdf]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Kailun and Yang, Jianfeng and Tao, Tao and Wu, Wenfei and Jiang, Jiaming and Xiao, Jinsheng}, title = {Multi-Prototype Compactness and Boundary-Aware Synthesis for Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28524-28533} }
DynBridge: Bridging Imagination and Control through Interaction Dynamics for Robot Manipulation: Alex Wang,

Zhiwei Dong,

Qicheng Bai,

Chenshi Zhang,

Yujie Yi,

Guang Dai,

Yong Liu,

Mengmeng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Alex and Dong, Zhiwei and Bai, Qicheng and Zhang, Chenshi and Yi, Yujie and Dai, Guang and Liu, Yong and Wang, Mengmeng}, title = {DynBridge: Bridging Imagination and Control through Interaction Dynamics for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22423-22432} }
BulletTime: Decoupled Control of Time and Camera Pose for Video Generation: Yiming Wang,

Qihang Zhang,

Shengqu Cai,

Tong Wu,

Jan Ackermann,

Zhengfei Kuang,

Yang Zheng,

Frano Rajič,

Siyu Tang,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yiming and Zhang, Qihang and Cai, Shengqu and Wu, Tong and Ackermann, Jan and Kuang, Zhengfei and Zheng, Yang and Raji\v{c}, Frano and Tang, Siyu and Wetzstein, Gordon}, title = {BulletTime: Decoupled Control of Time and Camera Pose for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18319-18330} }
Human Interaction-Aware 3D Reconstruction from a Single Image: Gwanghyun Kim,

Junghun James Kim,

Suh Yoon Jeon,

Jason Park,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Gwanghyun and Kim, Junghun James and Jeon, Suh Yoon and Park, Jason and Chun, Se Young}, title = {Human Interaction-Aware 3D Reconstruction from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21252-21261} }
Adaptive Action Chunking at Inference-time for Vision-Language-Action Models: Yuanchang Liang,

Xiaobo Wang,

Kai Wang,

Shuo Wang,

Xiaojiang Peng,

Haoyu Chen,

David Kim Huat Chua,

Prahlad Vadakkepat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yuanchang and Wang, Xiaobo and Wang, Kai and Wang, Shuo and Peng, Xiaojiang and Chen, Haoyu and Chua, David Kim Huat and Vadakkepat, Prahlad}, title = {Adaptive Action Chunking at Inference-time for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20802-20811} }
MA-Bench: Towards Fine-grained Micro-Action Understanding: Kun Li,

Jihao Gu,

Fei Wang,

Zhiliang Wu,

Hehe Fan,

Dan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kun and Gu, Jihao and Wang, Fei and Wu, Zhiliang and Fan, Hehe and Guo, Dan}, title = {MA-Bench: Towards Fine-grained Micro-Action Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20118-20128} }
Beyond Layer-Wise Merging: Chain-of-Merging for Vision-Language Models: Xinyu Zhang,

Yuxuan Dong,

Lingling Zhang,

Chengyou Jia,

ZhuoHang Dang,

Yixing Yao,

Yaqiang Wu,

Basura Fernando,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinyu and Dong, Yuxuan and Zhang, Lingling and Jia, Chengyou and Dang, ZhuoHang and Yao, Yixing and Wu, Yaqiang and Fernando, Basura and Liu, Jun}, title = {Beyond Layer-Wise Merging: Chain-of-Merging for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24279-24289} }
WeaveTime: Streaming from Earlier Frames into Emergent Memory in VideoLLMs: Yulin Zhang,

Cheng Shi,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yulin and Shi, Cheng and Yang, Sibei}, title = {WeaveTime: Streaming from Earlier Frames into Emergent Memory in VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16920-16932} }
Self-Attention Driven Tensor Representation for High-Order Data Recovery: Zhi-Wei Shi,

Yu-Bang Zheng,

Heng-Chao Li; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhi-Wei and Zheng, Yu-Bang and Li, Heng-Chao}, title = {Self-Attention Driven Tensor Representation for High-Order Data Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26399-26408} }
FLARE: A Failure-Aware Framework for Autonomous Correction and Recovery in Visual-Language Robotic Manipulation: Ganlong Zhao,

Zijia Tang,

Xingping Chen,

Zhanghui Kuang,

Ye Tian,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ganlong and Tang, Zijia and Chen, Xingping and Kuang, Zhanghui and Tian, Ye and Li, Guanbin}, title = {FLARE: A Failure-Aware Framework for Autonomous Correction and Recovery in Visual-Language Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22391-22401} }
ReFAct: Empowering Multimodal Web Agents with Visual and Context Focusing: Rui Wu,

Shuo Zhang,

Xiaoxuan Tang,

Ruirui Zhang,

Yi Liu,

Tao Jiang,

Wenhao Xu,

Yong Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Rui and Zhang, Shuo and Tang, Xiaoxuan and Zhang, Ruirui and Liu, Yi and Jiang, Tao and Xu, Wenhao and Li, Yong}, title = {ReFAct: Empowering Multimodal Web Agents with Visual and Context Focusing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14990-14999} }
Breaking the Scalability Limit of Multi-Projector Calibration with Embedded Cameras: Takumi Kawano,

Kohei Miura,

Daisuke Iwai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawano_2026_CVPR, author = {Kawano, Takumi and Miura, Kohei and Iwai, Daisuke}, title = {Breaking the Scalability Limit of Multi-Projector Calibration with Embedded Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21573-21582} }
Distilling Quasi-Conformal Mapping: A Generalizable and Efficient Solution for Wide-Angle Correction: Chengyang Liu,

Zixuan Lin,

Miaolin Han,

Michael K. Ng,

Huibin Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chengyang and Lin, Zixuan and Han, Miaolin and Ng, Michael K. and Li, Huibin}, title = {Distilling Quasi-Conformal Mapping: A Generalizable and Efficient Solution for Wide-Angle Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19686-19695} }
UNICBench: UNIfied Counting Benchmark for MLLM: Chenggang Rong,

Tao Han,

Zhiyuan Zhao,

Yaowu Fan,

Jia Wan,

Song Guo,

Yuan Yuan,

Junyu Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rong_2026_CVPR, author = {Rong, Chenggang and Han, Tao and Zhao, Zhiyuan and Fan, Yaowu and Wan, Jia and Guo, Song and Yuan, Yuan and Gao, Junyu}, title = {UNICBench: UNIfied Counting Benchmark for MLLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23730-23740} }
CGL: Advancing Continual GUI Learning via Reinforcement Fine-Tuning: Zhenquan Yao,

Zitong Huang,

Yihan Zeng,

Jianhua Han,

Hang Xu,

Chun-Mei Feng,

Jianwei Ma,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Zhenquan and Huang, Zitong and Zeng, Yihan and Han, Jianhua and Xu, Hang and Feng, Chun-Mei and Ma, Jianwei and Zuo, Wangmeng}, title = {CGL: Advancing Continual GUI Learning via Reinforcement Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15212-15221} }
DVGT: Driving Visual Geometry Transformer: Sicheng Zuo,

Zixun Xie,

Wenzhao Zheng,

Shaoqing Xu,

Fang Li,

Shengyin Jiang,

Long Chen,

Zhi-Xin Yang,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Sicheng and Xie, Zixun and Zheng, Wenzhao and Xu, Shaoqing and Li, Fang and Jiang, Shengyin and Chen, Long and Yang, Zhi-Xin and Lu, Jiwen}, title = {DVGT: Driving Visual Geometry Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14658-14668} }
TextFM: Robust Semi-dense Feature Matching with Language Guidance: Zhihao Zheng,

Jinglun Feng,

Nirav Savaliya,

Zheng-Hang Yeh,

Bo Lang,

Mooi Choo Chuah; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhihao and Feng, Jinglun and Savaliya, Nirav and Yeh, Zheng-Hang and Lang, Bo and Chuah, Mooi Choo}, title = {TextFM: Robust Semi-dense Feature Matching with Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16635-16644} }
Towards Open Environments and Instructions: General Vision-Language Navigation via Fast-Slow Interactive Reasoning: Yang Li,

Aming Wu,

Zihao Zhang,

Yahong Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yang and Wu, Aming and Zhang, Zihao and Han, Yahong}, title = {Towards Open Environments and Instructions: General Vision-Language Navigation via Fast-Slow Interactive Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25184-25192} }
ExpPortrait: Expressive Portrait Generation via Personalized Representation: Junyi Wang,

Yudong Guo,

Boyang Guo,

Shengming Yang,

Juyong Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Junyi and Guo, Yudong and Guo, Boyang and Yang, Shengming and Zhang, Juyong}, title = {ExpPortrait: Expressive Portrait Generation via Personalized Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18107-18117} }
SpaceMind: Camera-Guided Modality Fusion for Spatial Reasoning in Vision-Language Models: Ruosen Zhao,

Zhikang Zhang,

Jialei Xu,

Jiahao Chang,

Dong Chen,

Lingyun Li,

Weijian Sun,

Zizhuang Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruosen and Zhang, Zhikang and Xu, Jialei and Chang, Jiahao and Chen, Dong and Li, Lingyun and Sun, Weijian and Wei, Zizhuang}, title = {SpaceMind: Camera-Guided Modality Fusion for Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16811-16822} }
Concept-Guided Fine-Tuning: Steering ViTs away from Spurious Correlations to Improve Robustness: Yehonatan Elisha,

Oren Barkan,

Noam Koenigstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elisha_2026_CVPR, author = {Elisha, Yehonatan and Barkan, Oren and Koenigstein, Noam}, title = {Concept-Guided Fine-Tuning: Steering ViTs away from Spurious Correlations to Improve Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17035-17045} }
FlashVGGT: Efficient and Scalable Visual Geometry Transformers with Compressed Descriptor Attention: Zipeng Wang,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zipeng and Xu, Dan}, title = {FlashVGGT: Efficient and Scalable Visual Geometry Transformers with Compressed Descriptor Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21826-21835} }
The Geometry of Robustness: Optimizing Loss Landscape Curvature and Feature Manifold Alignment for Robust Finetuning of Vision-Language Models: Shivang Chopra,

Shaunak Halbe,

Chengyue Huang,

Brisa Maneechotesuwan,

Zsolt Kira; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chopra_2026_CVPR, author = {Chopra, Shivang and Halbe, Shaunak and Huang, Chengyue and Maneechotesuwan, Brisa and Kira, Zsolt}, title = {The Geometry of Robustness: Optimizing Loss Landscape Curvature and Feature Manifold Alignment for Robust Finetuning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22133-22142} }
PolarGuide-GSDR: 3D Gaussian Splatting Driven by Polarization Priors and Deferred Reflection for Real-World Reflective Scenes: Derui Shan,

Qian Qiao,

Hao Lu,

Tao Du,

Peng Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Derui and Qiao, Qian and Lu, Hao and Du, Tao and Lu, Peng}, title = {PolarGuide-GSDR: 3D Gaussian Splatting Driven by Polarization Priors and Deferred Reflection for Real-World Reflective Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26000-26009} }
Parallelised Differentiable Straightest Geodesics for 3D Meshes: Hippolyte Verninas,

Caner Korkmaz,

Stefanos Zafeiriou,

Tolga Birdal,

Simone Foti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Verninas_2026_CVPR, author = {Verninas, Hippolyte and Korkmaz, Caner and Zafeiriou, Stefanos and Birdal, Tolga and Foti, Simone}, title = {Parallelised Differentiable Straightest Geodesics for 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14637-14647} }
Fusion of Depth and Semantics for Probabilistic Floorplan Localization: Kecheng Ye,

Mao Chen,

Xiangkai Zhang,

Xu Yang; [pdf]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Kecheng and Chen, Mao and Zhang, Xiangkai and Yang, Xu}, title = {Fusion of Depth and Semantics for Probabilistic Floorplan Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19413-19422} }
FFP-300K: Scaling First-Frame Propagation for Generalizable Video Editing: Xijie Huang,

Chengming Xu,

Donghao Luo,

Xiaobin Hu,

Peng Tang,

Xu Peng,

Jiangning Zhang,

Chengjie Wang,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xijie and Xu, Chengming and Luo, Donghao and Hu, Xiaobin and Tang, Peng and Peng, Xu and Zhang, Jiangning and Wang, Chengjie and Fu, Yanwei}, title = {FFP-300K: Scaling First-Frame Propagation for Generalizable Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23172-23181} }
OpenMarcie: Dataset for Multimodal Action Recognition in Industrial Environments: Hymalai Bello,

Lala Ray,

Joanna Sorysz,

Sungho Suh,

Paul Lukowicz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bello_2026_CVPR, author = {Bello, Hymalai and Ray, Lala and Sorysz, Joanna and Suh, Sungho and Lukowicz, Paul}, title = {OpenMarcie: Dataset for Multimodal Action Recognition in Industrial Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20129-20138} }
Avatar Forcing: Real-Time Interactive Head Avatar Generation for Natural Conversation: Taekyung Ki,

Sangwon Jang,

Jaehyeong Jo,

Jaehong Yoon,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ki_2026_CVPR, author = {Ki, Taekyung and Jang, Sangwon and Jo, Jaehyeong and Yoon, Jaehong and Hwang, Sung Ju}, title = {Avatar Forcing: Real-Time Interactive Head Avatar Generation for Natural Conversation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18074-18084} }
No Way To Steal My Face: Proactive Defense Against Identity-Preserving Personalized Generation: Lizhi Xiong,

Jun Li,

Ziqiang Li,

Weiwei Jiang,

Zhangjie Fu; [pdf] [supp]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Lizhi and Li, Jun and Li, Ziqiang and Jiang, Weiwei and Fu, Zhangjie}, title = {No Way To Steal My Face: Proactive Defense Against Identity-Preserving Personalized Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20680-20690} }
GaussianVision: Vision-Language Alignment from Compressed Image Representations using 2D Gaussian Splatting: Yasmine Omri,

Connor Ding,

Tsachy Weissman,

Thierry Tambe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Omri_2026_CVPR, author = {Omri, Yasmine and Ding, Connor and Weissman, Tsachy and Tambe, Thierry}, title = {GaussianVision: Vision-Language Alignment from Compressed Image Representations using 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14926-14935} }
Domain Sensitive Federated Learning with Fisher-Informed Pruning: Chenchen Lin,

Wenhao Yuan,

Zhengji Xu,

Xuehe Wang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Chenchen and Yuan, Wenhao and Xu, Zhengji and Wang, Xuehe}, title = {Domain Sensitive Federated Learning with Fisher-Informed Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17535-17544} }
PAVAS: Physics-Aware Video-to-Audio Synthesis: Oh Hyun-Bin,

Yuhta Takida,

Toshimitsu Uesaka,

Tae-Hyun Oh,

Yuki Mitsufuji; [pdf] [supp]
[bibtex]
@InProceedings{Hyun-Bin_2026_CVPR, author = {Hyun-Bin, Oh and Takida, Yuhta and Uesaka, Toshimitsu and Oh, Tae-Hyun and Mitsufuji, Yuki}, title = {PAVAS: Physics-Aware Video-to-Audio Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14481-14491} }
From 3D Pose to Prose: Biomechanics-Grounded Vision-Language Coaching: Yuyang Ji,

Yixuan Shen,

Shengjie Zhu,

Yu Kong,

Feng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Yuyang and Shen, Yixuan and Zhu, Shengjie and Kong, Yu and Liu, Feng}, title = {From 3D Pose to Prose: Biomechanics-Grounded Vision-Language Coaching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23506-23515} }
VGGT-ohm: Jianyuan Wang,

Minghao Chen,

Shangzhan Zhang,

Nikita Karaev,

Johannes Schönberger,

Patrick Labatut,

Piotr Bojanowski,

David Novotny,

Andrea Vedaldi,

Christian Rupprecht; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jianyuan and Chen, Minghao and Zhang, Shangzhan and Karaev, Nikita and Sch\"onberger, Johannes and Labatut, Patrick and Bojanowski, Piotr and Novotny, David and Vedaldi, Andrea and Rupprecht, Christian}, title = {VGGT-ohm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21486-21499} }
3D-IDE: 3D Implicit Depth Emergent: Chushan Zhang,

Ruihan Lu,

Jinguang Tong,

Yikai Wang,

Hongdong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chushan and Lu, Ruihan and Tong, Jinguang and Wang, Yikai and Li, Hongdong}, title = {3D-IDE: 3D Implicit Depth Emergent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23837-23847} }
Sculpt4D: Generating 4D Shapes via Sparse-Attention Diffusion Transformers: Minghao Yin,

Wenbo Hu,

Jiale Xu,

Ying Shan,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Minghao and Hu, Wenbo and Xu, Jiale and Shan, Ying and Han, Kai}, title = {Sculpt4D: Generating 4D Shapes via Sparse-Attention Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27188-27198} }
Seeing Through the Noise: Improving Infrared Small Target Detection and Segmentation from Noise Suppression Perspective: Maoxun Yuan,

Duanni Meng,

Ziteng Xi,

Tianyi Zhao,

Shiji Zhao,

Yimian Dai,

Xingxing Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Maoxun and Meng, Duanni and Xi, Ziteng and Zhao, Tianyi and Zhao, Shiji and Dai, Yimian and Wei, Xingxing}, title = {Seeing Through the Noise: Improving Infrared Small Target Detection and Segmentation from Noise Suppression Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27783-27792} }
SegCompass: Exploring Interpretable Alignment with Sparse Autoencoders for Enhanced Reasoning Segmentation: Zhenyu Lu,

Liupeng Li,

Jinpeng Wang,

Haoqian Kang,

Yan Feng,

Ke Chen,

Yaowei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zhenyu and Li, Liupeng and Wang, Jinpeng and Kang, Haoqian and Feng, Yan and Chen, Ke and Wang, Yaowei}, title = {SegCompass: Exploring Interpretable Alignment with Sparse Autoencoders for Enhanced Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19132-19142} }
AdapAction: Adaptive Target Action Backdoor Attack against GUI Agents: Baicheng Chen,

Mingda Zhang,

Min Zhang,

Haizhou Li,

Baoyuan Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Baicheng and Zhang, Mingda and Zhang, Min and Li, Haizhou and Wu, Baoyuan}, title = {AdapAction: Adaptive Target Action Backdoor Attack against GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27894-27905} }
VideoFusion: A Spatio-Temporal Collaborative Network for Multi-modal Video Fusion: Linfeng Tang,

Yeda Wang,

Meiqi Gong,

Zizhuo Li,

Yuxin Deng,

Xunpeng Yi,

Chunyu Li,

Han Xu,

Hao Zhang,

Jiayi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Linfeng and Wang, Yeda and Gong, Meiqi and Li, Zizhuo and Deng, Yuxin and Yi, Xunpeng and Li, Chunyu and Xu, Han and Zhang, Hao and Ma, Jiayi}, title = {VideoFusion: A Spatio-Temporal Collaborative Network for Multi-modal Video Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19559-19569} }
EgoAVU: Egocentric Audio-Visual Understanding: Ashish Seth,

Xinhao Mei,

Changsheng Zhao,

Varun Nagaraja,

Ernie Chang,

Gregory P. Meyer,

Gael Le Lan,

Yunyang Xiong,

Vikas Chandra,

Yangyang Shi,

Dinesh Manocha,

Zhipeng Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seth_2026_CVPR, author = {Seth, Ashish and Mei, Xinhao and Zhao, Changsheng and Nagaraja, Varun and Chang, Ernie and Meyer, Gregory P. and Le Lan, Gael and Xiong, Yunyang and Chandra, Vikas and Shi, Yangyang and Manocha, Dinesh and Cai, Zhipeng}, title = {EgoAVU: Egocentric Audio-Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15805-15814} }
CARD: Correlation Aware Restoration with Diffusion: Niki Nezakati,

Arnab Ghosh,

Amit Roy-Chowdhury,

Vishwanath Saragadam; [pdf] [supp]
[bibtex]
@InProceedings{Nezakati_2026_CVPR, author = {Nezakati, Niki and Ghosh, Arnab and Roy-Chowdhury, Amit and Saragadam, Vishwanath}, title = {CARD: Correlation Aware Restoration with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16531-16540} }
3D Gaussian Splatting at Arbitrary Resolutions with Compact Proxy Anchors: Mingyun Jeong,

Seongro Yoon,

Francois Bremond,

Donghyeon Cho; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Mingyun and Yoon, Seongro and Bremond, Francois and Cho, Donghyeon}, title = {3D Gaussian Splatting at Arbitrary Resolutions with Compact Proxy Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18991-19000} }
TopoHR: Hierarchical Centerline Representation for Cyclic Topology Reasoning in Driving Scenes with Point-to-Instance Relations: Yifeng Bai,

Zhirong Chen,

Bo Song,

Erkang Cheng,

Haibin Ling; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Yifeng and Chen, Zhirong and Song, Bo and Cheng, Erkang and Ling, Haibin}, title = {TopoHR: Hierarchical Centerline Representation for Cyclic Topology Reasoning in Driving Scenes with Point-to-Instance Relations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18161-18170} }
Depth Any Panoramas: A Foundation Model for Panoramic Depth Estimation: Xin Lin,

Meixi Song,

Dizhe Zhang,

Wenxuan Lu,

Haodong Li,

Bo Du,

Ming-Hsuan Yang,

Truong Nguyen,

Lu Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Xin and Song, Meixi and Zhang, Dizhe and Lu, Wenxuan and Li, Haodong and Du, Bo and Yang, Ming-Hsuan and Nguyen, Truong and Qi, Lu}, title = {Depth Any Panoramas: A Foundation Model for Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26835-26844} }
Identity-Preserving Image-to-Video Generation via Reward-Guided Optimization: Liao Shen,

Wentao Jiang,

Yiran Zhu,

Jiahe Li,

Tiezheng Ge,

Zhiguo Cao,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Liao and Jiang, Wentao and Zhu, Yiran and Li, Jiahe and Ge, Tiezheng and Cao, Zhiguo and Zheng, Bo}, title = {Identity-Preserving Image-to-Video Generation via Reward-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27281-27290} }
CREward: A Type-Specific Creativity Reward Model: Jiyeon Han,

Ali Mahdavi-Amiri,

Hao Zhang,

Haedong Jeong; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jiyeon and Mahdavi-Amiri, Ali and Zhang, Hao and Jeong, Haedong}, title = {CREward: A Type-Specific Creativity Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21932-21941} }
TimeRipples: Accelerating vDiTs by Understanding the Spatio-Temporal Correlations in Latent Space: Wenxuan Mao,

Yulin Sun,

Aiyue Chen,

Jing Lin,

Yiwu Yao,

Yiming Gan,

Jieru Zhao,

Jingwen Leng,

Minyi Guo,

Yu Feng; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Wenxuan and Sun, Yulin and Chen, Aiyue and Lin, Jing and Yao, Yiwu and Gan, Yiming and Zhao, Jieru and Leng, Jingwen and Guo, Minyi and Feng, Yu}, title = {TimeRipples: Accelerating vDiTs by Understanding the Spatio-Temporal Correlations in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25688-25698} }
GUI-CEval: A Hierarchical and Comprehensive Chinese Benchmark for Mobile GUI Agents: Yang Li,

Yuchen Liu,

Haoyu Lu,

Zhiqiang Xia,

Hongzhen Wang,

Kaiyang Han,

Changpeng Yang,

Jinyang Wu,

Jiaming Xu,

Runyu Shi,

Ying Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yang and Liu, Yuchen and Lu, Haoyu and Xia, Zhiqiang and Wang, Hongzhen and Han, Kaiyang and Yang, Changpeng and Wu, Jinyang and Xu, Jiaming and Shi, Runyu and Huang, Ying}, title = {GUI-CEval: A Hierarchical and Comprehensive Chinese Benchmark for Mobile GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20303-20312} }
TAS-LoRA: Transformer Architecture Search with Mixture-of-LoRA Experts: Jeimin Jeon,

Hyunju Lee,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2026_CVPR, author = {Jeon, Jeimin and Lee, Hyunju and Ham, Bumsub}, title = {TAS-LoRA: Transformer Architecture Search with Mixture-of-LoRA Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20243-20252} }
Beyond Reassembly: Fractured Object Recovery with Missing Parts: Qun-Ce Xu,

Jiahui Li,

Yan-Pei Cao,

Weihao Cheng,

Tai-Jiang Mu,

Ying Shan,

Chuan Li,

Da Chen,

Yong-Liang Yang,

Shi-min Hu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qun-Ce and Li, Jiahui and Cao, Yan-Pei and Cheng, Weihao and Mu, Tai-Jiang and Shan, Ying and Li, Chuan and Chen, Da and Yang, Yong-Liang and Hu, Shi-min}, title = {Beyond Reassembly: Fractured Object Recovery with Missing Parts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20488-20498} }
Frame2Freq: Spectral Adapters for Fine-Grained Video Understanding: Thinesh Thiyakesan Ponbagavathi,

Constantin Seibold,

Alina Roitberg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ponbagavathi_2026_CVPR, author = {Ponbagavathi, Thinesh Thiyakesan and Seibold, Constantin and Roitberg, Alina}, title = {Frame2Freq: Spectral Adapters for Fine-Grained Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24073-24083} }
UI-Lens: Assessing General MLLMs' Potential to Automate UI Display Quality Assurance: Wei Xiang,

Yexinrui Wu,

Xinli Chen,

Xinran Li,

Shi Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Wei and Wu, Yexinrui and Chen, Xinli and Li, Xinran and Chen, Shi}, title = {UI-Lens: Assessing General MLLMs' Potential to Automate UI Display Quality Assurance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25882-25892} }
DA-VAE: Plug-in Latent Compression for Diffusion via Detail Alignment: Xin Cai,

Zhiyuan You,

Zhoutong Zhang,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Xin and You, Zhiyuan and Zhang, Zhoutong and Xue, Tianfan}, title = {DA-VAE: Plug-in Latent Compression for Diffusion via Detail Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18703-18713} }
InternVideo-Next: Towards World-Understanding Video Models: Chenting Wang,

Yuhan Zhu,

Yicheng Xu,

Jiange Yang,

Ziang Yan,

Yali Wang,

Yi Wang,

Limin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenting and Zhu, Yuhan and Xu, Yicheng and Yang, Jiange and Yan, Ziang and Wang, Yali and Wang, Yi and Wang, Limin}, title = {InternVideo-Next: Towards World-Understanding Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16966-16976} }
VMD-FACT: A New Video Dataset and MLLM-based method for Detecting Realistic AI-Generated Video Misinformation: Yongkang Zhang,

Dongyu She,

Baiyu Ji,

Qichuan Geng,

Zhong Zhou,

Yan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongkang and She, Dongyu and Ji, Baiyu and Geng, Qichuan and Zhou, Zhong and Wang, Yan}, title = {VMD-FACT: A New Video Dataset and MLLM-based method for Detecting Realistic AI-Generated Video Misinformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21284-21294} }
ReMatch: Boosting Representation through Matching for Multimodal Retrieval: Qianying Liu,

Xiao Liang,

Zhiqiang Zhang,

Yibo Chen,

Xu Tang,

Zhongfei Qing,

Fengfan Zhou,

Yao Hu,

Paul Henderson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qianying and Liang, Xiao and Zhang, Zhiqiang and Chen, Yibo and Tang, Xu and Qing, Zhongfei and Zhou, Fengfan and Hu, Yao and Henderson, Paul}, title = {ReMatch: Boosting Representation through Matching for Multimodal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16823-16833} }
Same Attention, Different Truths: Put Logit-Lens over Visual Attention to Detect and Mitigate LVLM Object Hallucination: Zichuan Wang,

Songlin Yang,

Bo Peng,

Zhenchen Tang,

Yang Li,

Beibei Dong,

Jing Dong; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zichuan and Yang, Songlin and Peng, Bo and Tang, Zhenchen and Li, Yang and Dong, Beibei and Dong, Jing}, title = {Same Attention, Different Truths: Put Logit-Lens over Visual Attention to Detect and Mitigate LVLM Object Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25315-25325} }
POCA: Pareto-Optimal Curriculum Alignment for Visual Text Generation: Yaohou Fan,

Qingzhong Wang,

Yongsong Huang,

Junyi Liu,

Tomo Miyazaki,

Shinichiro Omachi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yaohou and Wang, Qingzhong and Huang, Yongsong and Liu, Junyi and Miyazaki, Tomo and Omachi, Shinichiro}, title = {POCA: Pareto-Optimal Curriculum Alignment for Visual Text Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21891-21900} }
Solvability of the Viewing Graph Under the Affine Camera Model: Gabriele Pedroni,

Rakshith Madhavan,

Federica Arrigoni; [pdf] [supp]
[bibtex]
@InProceedings{Pedroni_2026_CVPR, author = {Pedroni, Gabriele and Madhavan, Rakshith and Arrigoni, Federica}, title = {Solvability of the Viewing Graph Under the Affine Camera Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26731-26740} }
Seeing Beyond 8bits: Subjective and Objective Quality Assessment of HDR-UGC Videos: Shreshth Saini,

Bowen Chen,

Yilin Wang,

Neil Birkbeck,

Balu Adsumilli,

Alan C. Bovik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saini_2026_CVPR, author = {Saini, Shreshth and Chen, Bowen and Wang, Yilin and Birkbeck, Neil and Adsumilli, Balu and Bovik, Alan C.}, title = {Seeing Beyond 8bits: Subjective and Objective Quality Assessment of HDR-UGC Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15538-15549} }
Seeing Both Sides: Towards Bidirectional Semantic Alignment for Open-Vocabulary Camouflaged Object Segmentation: Guohui Zhang,

Fuming Sun,

Yu Zhao,

Yuqiu Kong,

Jing Sun,

Fasheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohui and Sun, Fuming and Zhao, Yu and Kong, Yuqiu and Sun, Jing and Wang, Fasheng}, title = {Seeing Both Sides: Towards Bidirectional Semantic Alignment for Open-Vocabulary Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27655-27664} }
Geo2: Geometry-Guided Cross-view Geo-Localization and Image Synthesis: Yancheng Zhang,

Xiaohan Zhang,

Guangyu Sun,

Zonglin Lyu,

Safwan Wshah,

Chen Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yancheng and Zhang, Xiaohan and Sun, Guangyu and Lyu, Zonglin and Wshah, Safwan and Chen, Chen}, title = {Geo2: Geometry-Guided Cross-view Geo-Localization and Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19432-19442} }
Efficiency Follows Global-Local Decoupling: Zhenyu Yang,

Gensheng Pei,

Tao Chen,

Yichao Zhou,

Tianfei Zhou,

Yazhou Yao,

Fumin Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenyu and Pei, Gensheng and Chen, Tao and Zhou, Yichao and Zhou, Tianfei and Yao, Yazhou and Shen, Fumin}, title = {Efficiency Follows Global-Local Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25524-25535} }
SMRABooth: Subject and Motion Representation Alignment for Customized Video Generation: Xuancheng Xu,

Yaning Li,

Sisi You,

Bing-Kun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Xuancheng and Li, Yaning and You, Sisi and Bao, Bing-Kun}, title = {SMRABooth: Subject and Motion Representation Alignment for Customized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16130-16141} }
PowerCLIP: Powerset Alignment for Contrastive Pre-Training: Masaki Kawamura,

Nakamasa Inoue,

Rintaro Yanagi,

Hirokatsu Kataoka,

Rio Yokota; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawamura_2026_CVPR, author = {Kawamura, Masaki and Inoue, Nakamasa and Yanagi, Rintaro and Kataoka, Hirokatsu and Yokota, Rio}, title = {PowerCLIP: Powerset Alignment for Contrastive Pre-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22111-22122} }
Expanding mmWave Datasets for Human Pose Estimation with Unlabeled Data and LiDAR Datasets: Zhuoxuan Peng,

Boan Zhu,

Xingjian Zhang,

Wenying Li,

S.-H. Gary Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Zhuoxuan and Zhu, Boan and Zhang, Xingjian and Li, Wenying and Chan, S.-H. Gary}, title = {Expanding mmWave Datasets for Human Pose Estimation with Unlabeled Data and LiDAR Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21221-21230} }
Spectral Super-Resolution via Adversarial Unfolding and Data-Driven Spectrum Regularization: From Multispectral Satellite Data to NASA Hyperspectral Image: Si-Sheng Young,

Chia-Hsiang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Young_2026_CVPR, author = {Young, Si-Sheng and Lin, Chia-Hsiang}, title = {Spectral Super-Resolution via Adversarial Unfolding and Data-Driven Spectrum Regularization: From Multispectral Satellite Data to NASA Hyperspectral Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27827-27837} }
Copy-Transform-Paste: Zero-Shot Object-Object Alignment Guided by Vision-Language and Geometric Constraints: Rotem Gatenyo,

Ohad Fried; [pdf] [supp]
[bibtex]
@InProceedings{Gatenyo_2026_CVPR, author = {Gatenyo, Rotem and Fried, Ohad}, title = {Copy-Transform-Paste: Zero-Shot Object-Object Alignment Guided by Vision-Language and Geometric Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14936-14945} }
Generative Adversarial Perturbations with Cross-paradigm Transferability on Localized Crowd Counting: Alabi Mehzabin Anisha,

Guangjing Wang,

Sriram Chellappan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Anisha_2026_CVPR, author = {Anisha, Alabi Mehzabin and Wang, Guangjing and Chellappan, Sriram}, title = {Generative Adversarial Perturbations with Cross-paradigm Transferability on Localized Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20629-20638} }
ProxyFL: A Proxy-Guided Framework for Federated Semi-Supervised Learning: Duowen Chen,

Yan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Duowen and Wang, Yan}, title = {ProxyFL: A Proxy-Guided Framework for Federated Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17525-17534} }
FedRG: Unleashing the Representation Geometry for Federated Learning with Noisy Clients: Tian Wen,

Zhiqin Yang,

Yonggang Zhang,

Xuefeng Jiang,

Hao Peng,

Yuwei Wang,

Bo Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Tian and Yang, Zhiqin and Zhang, Yonggang and Jiang, Xuefeng and Peng, Hao and Wang, Yuwei and Han, Bo}, title = {FedRG: Unleashing the Representation Geometry for Federated Learning with Noisy Clients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24545-24556} }
MUFASA: A Multi-Layer Framework for Slot Attention: Sebastian Bock,

Leonie Schüßler,

Krishnakant Singh,

Simone Schaub-Meyer,

Stefan Roth; [pdf] [supp]
[bibtex]
@InProceedings{Bock_2026_CVPR, author = {Bock, Sebastian and Sch\"u{\ss}ler, Leonie and Singh, Krishnakant and Schaub-Meyer, Simone and Roth, Stefan}, title = {MUFASA: A Multi-Layer Framework for Slot Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27750-27760} }
RAMEN: Resolution-Adjustable Multimodal Encoder for Earth Observation: Nicolas Houdré,

Diego Marcos,

Hugo Riffaud de Turckheim,

Dino Ienco,

Laurent Wendling,

Camille Kurtz,

Sylvain Lobry; [pdf] [supp]
[bibtex]
@InProceedings{Houdre_2026_CVPR, author = {Houdr\'e, Nicolas and Marcos, Diego and de Turckheim, Hugo Riffaud and Ienco, Dino and Wendling, Laurent and Kurtz, Camille and Lobry, Sylvain}, title = {RAMEN: Resolution-Adjustable Multimodal Encoder for Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27838-27848} }
Designing to Forget: Deep Semi-parametric Models for Unlearning: Amber Yijia Zheng,

Yu-Shan Tai,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Amber Yijia and Tai, Yu-Shan and Yeh, Raymond A.}, title = {Designing to Forget: Deep Semi-parametric Models for Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17409-17419} }
Forecast the Principal, Stabilize the Residual: Subspace-Aware Feature Caching for Diffusion Transformers: Guantao Chen,

Shikang Zheng,

Yuqi Lin,

Linfeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Guantao and Zheng, Shikang and Lin, Yuqi and Zhang, Linfeng}, title = {Forecast the Principal, Stabilize the Residual: Subspace-Aware Feature Caching for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23632-23641} }
VENI: Variational Encoder for Natural Illumination: Paul Walker,

James A. D. Gardner,

Andreea Ardelean,

William A. P. Smith,

Bernhard Egger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Walker_2026_CVPR, author = {Walker, Paul and Gardner, James A. D. and Ardelean, Andreea and Smith, William A. P. and Egger, Bernhard}, title = {VENI: Variational Encoder for Natural Illumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16248-16257} }
MVGGT: Multimodal Visual Geometry Grounded Transformer for Multiview 3D Referring Expression Segmentation: Changli Wu,

Haodong Wang,

Jiayi Ji,

Yutian Yao,

Chunsai Du,

Jihua Kang,

Yanwei Fu,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Changli and Wang, Haodong and Ji, Jiayi and Yao, Yutian and Du, Chunsai and Kang, Jihua and Fu, Yanwei and Cao, Liujuan}, title = {MVGGT: Multimodal Visual Geometry Grounded Transformer for Multiview 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16800-16810} }
Next-Scale Autoregressive Models for Text-to-Motion Generation: Zhiwei Zheng,

Shibo Jin,

Lingjie Liu,

Mingmin Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiwei and Jin, Shibo and Liu, Lingjie and Zhao, Mingmin}, title = {Next-Scale Autoregressive Models for Text-to-Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16376-16386} }
PointWorld: Scaling 3D World Models for In-The-Wild Robotic Manipulation: Wenlong Huang,

Yu-Wei Chao,

Arsalan Mousavian,

Ming-Yu Liu,

Dieter Fox,

Kaichun Mo,

Li Fei-Fei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Wenlong and Chao, Yu-Wei and Mousavian, Arsalan and Liu, Ming-Yu and Fox, Dieter and Mo, Kaichun and Fei-Fei, Li}, title = {PointWorld: Scaling 3D World Models for In-The-Wild Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20765-20779} }
TAG-MoE: Task-Aware Gating for Unified Generative Mixture-of-Experts: Yu Xu,

Hongbin Yan,

Juan Cao,

Yiji Cheng,

Tiankai Hang,

Runze He,

Zijin Yin,

Shiyi Zhang,

Yuxin Zhang,

Jintao Li,

Chunyu Wang,

Qinglin Lu,

Tong-Yee Lee,

Fan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yu and Yan, Hongbin and Cao, Juan and Cheng, Yiji and Hang, Tiankai and He, Runze and Yin, Zijin and Zhang, Shiyi and Zhang, Yuxin and Li, Jintao and Wang, Chunyu and Lu, Qinglin and Lee, Tong-Yee and Tang, Fan}, title = {TAG-MoE: Task-Aware Gating for Unified Generative Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27270-27280} }
MDCS-MoAME: Multi-directional Composite Scanning with Mixture of Attention and Mamba Experts for Cancer Survival Prediction: Linjie Qu,

Jin Xiao,

Xiangrong Liu,

Changming Sun,

Hui Cui,

Yuqi Fang,

Ran Su,

Qiangguo Jin,

Leyi Wei; [pdf]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Linjie and Xiao, Jin and Liu, Xiangrong and Sun, Changming and Cui, Hui and Fang, Yuqi and Su, Ran and Jin, Qiangguo and Wei, Leyi}, title = {MDCS-MoAME: Multi-directional Composite Scanning with Mixture of Attention and Mamba Experts for Cancer Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14461-14470} }
GeoGuide: Hierarchical Geometric Guidance for Open-Vocabulary 3D Semantic Segmentation: Xujing Tao,

Chuxin Wang,

Yubo Ai,

Zhixin Cheng,

Zhuoyuan Li,

Liangsheng Liu,

Yujia Chen,

Xinjun Li,

Qiao Li,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Xujing and Wang, Chuxin and Ai, Yubo and Cheng, Zhixin and Li, Zhuoyuan and Liu, Liangsheng and Chen, Yujia and Li, Xinjun and Li, Qiao and Yang, Wenfei and Zhang, Tianzhu}, title = {GeoGuide: Hierarchical Geometric Guidance for Open-Vocabulary 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26855-26866} }
Volumetric Functional Maps: Filippo Maggioli,

Simone Melzi,

Marco Livesu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maggioli_2026_CVPR, author = {Maggioli, Filippo and Melzi, Simone and Livesu, Marco}, title = {Volumetric Functional Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20443-20454} }
Focus-to-Perceive Representation Learning: A Cognition-Inspired Hierarchical Framework for Endoscopic Video Analysis: Yuan Zhang,

Sihao Dou,

Kai Hu,

Shuhua Deng,

Chunhong Cao,

Fen Xiao,

Xieping Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuan and Dou, Sihao and Hu, Kai and Deng, Shuhua and Cao, Chunhong and Xiao, Fen and Gao, Xieping}, title = {Focus-to-Perceive Representation Learning: A Cognition-Inspired Hierarchical Framework for Endoscopic Video Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28264-28274} }
DAGE: Dual-Stream Architecture for Efficient and Fine-Grained Geometry Estimation: Tuan Duc Ngo,

Jiahui Huang,

Seoung Wug Oh,

Kevin Blackburn-Matzen,

Evangelos Kalogerakis,

Chuang Gan,

Joon-Young Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ngo_2026_CVPR, author = {Ngo, Tuan Duc and Huang, Jiahui and Oh, Seoung Wug and Blackburn-Matzen, Kevin and Kalogerakis, Evangelos and Gan, Chuang and Lee, Joon-Young}, title = {DAGE: Dual-Stream Architecture for Efficient and Fine-Grained Geometry Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21701-21712} }
ORIC: Benchmarking Object Recognition under Contextual Incongruity in Large Vision-Language Models: Zhaoyang Li,

Zhan Ling,

Yuchen Zhou,

Litian Gong,

Erdem Biyik,

Hao Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhaoyang and Ling, Zhan and Zhou, Yuchen and Gong, Litian and Biyik, Erdem and Su, Hao}, title = {ORIC: Benchmarking Object Recognition under Contextual Incongruity in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23673-23684} }
QD-PCQA: Quality-Aware Domain Adaptation for Point Cloud Quality Assessment: Guohua Zhang,

Jian Jin,

Meiqin Liu,

Chao Yao,

Weisi Lin; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohua and Jin, Jian and Liu, Meiqin and Yao, Chao and Lin, Weisi}, title = {QD-PCQA: Quality-Aware Domain Adaptation for Point Cloud Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17143-17152} }
Gradient Knows Best: Mixed-Precision Quantization via Gradient-Guided Bit Allocation for Super-Resolution: Jun Young Kim,

Joo Hyeon Jeon,

Sangyeon Ahn,

Yoonseo Park,

Yong Seok Oh,

Bogyeong Kim,

Sung In Cho; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jun Young and Jeon, Joo Hyeon and Ahn, Sangyeon and Park, Yoonseo and Oh, Yong Seok and Kim, Bogyeong and Cho, Sung In}, title = {Gradient Knows Best: Mixed-Precision Quantization via Gradient-Guided Bit Allocation for Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16355-16364} }
Pixels Don't Lie (But Your Detector Might): Bootstrapping MLLM-as-a-Judge for Trustworthy Deepfake Detection and Reasoning Supervision: Kartik Kuckreja,

Parul Gupta,

Muhammad Haris Khan,

Abhinav Dhall; [pdf] [supp]
[bibtex]
@InProceedings{Kuckreja_2026_CVPR, author = {Kuckreja, Kartik and Gupta, Parul and Khan, Muhammad Haris and Dhall, Abhinav}, title = {Pixels Don't Lie (But Your Detector Might): Bootstrapping MLLM-as-a-Judge for Trustworthy Deepfake Detection and Reasoning Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25871-25881} }
VS-Bench: Evaluating VLMs for Strategic Abilities in Multi-Agent Environments: Zelai Xu,

Zhexuan Xu,

Xiangmin Yi,

Huining Yuan,

Mo Guang,

Kaiwen Long,

Xinlei Chen,

Yi Wu,

Chao Yu,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zelai and Xu, Zhexuan and Yi, Xiangmin and Yuan, Huining and Guang, Mo and Long, Kaiwen and Chen, Xinlei and Wu, Yi and Yu, Chao and Wang, Yu}, title = {VS-Bench: Evaluating VLMs for Strategic Abilities in Multi-Agent Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21561-21572} }
Missing No More: Dictionary-Guided Cross-Modal Image Fusion under Missing Infrared: Yafei Zhang,

Meng Ma,

Huafeng Li,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yafei and Ma, Meng and Li, Huafeng and Liu, Yu}, title = {Missing No More: Dictionary-Guided Cross-Modal Image Fusion under Missing Infrared}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19549-19558} }
Hyperbolic Defect Feature Synthesis for Few-Shot Defect Classification: Huimin Li,

Boxuan Hu,

Yulin Zhang,

Xiuzhuang Zhou,

Junlin Hu; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Huimin and Hu, Boxuan and Zhang, Yulin and Zhou, Xiuzhuang and Hu, Junlin}, title = {Hyperbolic Defect Feature Synthesis for Few-Shot Defect Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19602-19612} }
HVG-3D: Bridging Real and Simulation Domains for 3D-Conditional Hand-Object Interaction Video Synthesis: Mingjin Chen,

Junhao Chen,

Zhaoxin Fan,

Yujian Lee,

Zichen Dang,

Lili Wang,

Yawen Cui,

Lap-Pui Chau,

Yi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Mingjin and Chen, Junhao and Fan, Zhaoxin and Lee, Yujian and Dang, Zichen and Wang, Lili and Cui, Yawen and Chau, Lap-Pui and Wang, Yi}, title = {HVG-3D: Bridging Real and Simulation Domains for 3D-Conditional Hand-Object Interaction Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15986-15997} }
Qwen-Image-Layered: Towards Inherent Editability via Layer Decomposition: Shengming Yin,

Zekai Zhang,

Zecheng Tang,

Kaiyuan Gao,

Xiao Xu,

Kun Yan,

Jiahao Li,

Yilei Chen,

Yuxiang Chen,

Heung-Yeung Shum,

Lionel M. Ni,

Junyang Lin,

Chenfei Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Shengming and Zhang, Zekai and Tang, Zecheng and Gao, Kaiyuan and Xu, Xiao and Yan, Kun and Li, Jiahao and Chen, Yilei and Chen, Yuxiang and Shum, Heung-Yeung and Ni, Lionel M. and Lin, Junyang and Wu, Chenfei}, title = {Qwen-Image-Layered: Towards Inherent Editability via Layer Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16196-16205} }
Dictionary-Aligned Concept Control for Safeguarding Multimodal LLMs: Jinqi Luo,

Jinyu Yang,

Tal Neiman,

Lei Fan,

Bing Yin,

Son Tran,

Mubarak Shah,

René Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Jinqi and Yang, Jinyu and Neiman, Tal and Fan, Lei and Yin, Bing and Tran, Son and Shah, Mubarak and Vidal, Ren\'e}, title = {Dictionary-Aligned Concept Control for Safeguarding Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15815-15828} }
Bridging Facial Understanding and Animation via Language Models: Luchuan Song,

Pinxin Liu,

Haiyang Liu,

Zhenchao Jin,

Yolo Yunlong Tang,

Zichong Xu,

Susan Liang,

Jing Bi,

Jason J Corso,

Chenliang Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Luchuan and Liu, Pinxin and Liu, Haiyang and Jin, Zhenchao and Tang, Yolo Yunlong and Xu, Zichong and Liang, Susan and Bi, Jing and Corso, Jason J and Xu, Chenliang}, title = {Bridging Facial Understanding and Animation via Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17557-17567} }
ProactiveMobile: A Comprehensive Benchmark for Boosting Proactive Intelligence On Mobile Devices: Dezhi Kong,

Zhengzhao Feng,

Qiliang Liang,

Hao Wang,

Haofei Sun,

Changpeng Yang,

Yang Li,

Peng Zhou,

Shuai Nie,

Hongzhen Wang,

Linfeng Zhou,

Hao Jia,

Jiaming Xu,

Runyu Shi,

Ying Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Dezhi and Feng, Zhengzhao and Liang, Qiliang and Wang, Hao and Sun, Haofei and Yang, Changpeng and Li, Yang and Zhou, Peng and Nie, Shuai and Wang, Hongzhen and Zhou, Linfeng and Jia, Hao and Xu, Jiaming and Shi, Runyu and Huang, Ying}, title = {ProactiveMobile: A Comprehensive Benchmark for Boosting Proactive Intelligence On Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27503-27513} }
Thinking-while-Generating: Interleaving Textual Reasoning throughout Visual Generation: Ziyu Guo,

Renrui Zhang,

Hongyu Li,

Manyuan Zhang,

Xinyan Chen,

Sifan Wang,

Yan Feng,

Peng Pei,

Pheng-Ann Heng; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyu and Zhang, Renrui and Li, Hongyu and Zhang, Manyuan and Chen, Xinyan and Wang, Sifan and Feng, Yan and Pei, Peng and Heng, Pheng-Ann}, title = {Thinking-while-Generating: Interleaving Textual Reasoning throughout Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26295-26305} }
RAG-TP: A General Framework for Vehicle Trajectory Prediction via Retrieval-Augmented Generation: Ziyi Wang,

Yang Zhang,

Guijian Tang,

Chao Zhang,

Shibo Zhang,

Xueqiong Li,

Shaowu Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyi and Zhang, Yang and Tang, Guijian and Zhang, Chao and Zhang, Shibo and Li, Xueqiong and Yang, Shaowu}, title = {RAG-TP: A General Framework for Vehicle Trajectory Prediction via Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24865-24874} }
Beyond Appearance: Camouflaged Object Detection via Geometric Structure: Jinyu Han,

Changguang Wu,

Fuming Sun,

Jinhui Tang; [pdf]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jinyu and Wu, Changguang and Sun, Fuming and Tang, Jinhui}, title = {Beyond Appearance: Camouflaged Object Detection via Geometric Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25830-25840} }
ShapeR: Robust Conditional 3D Shape Generation from Casual Captures: Yawar Siddiqui,

Duncan Frost,

Samir Aroudj,

Armen Avetisyan,

Henry Howard-Jenkins,

Daniel DeTone,

Pierre Moulon,

Qirui Wu,

Zhengqin Li,

Julian Straub,

Richard Newcombe,

Jakob Engel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Siddiqui_2026_CVPR, author = {Siddiqui, Yawar and Frost, Duncan and Aroudj, Samir and Avetisyan, Armen and Howard-Jenkins, Henry and DeTone, Daniel and Moulon, Pierre and Wu, Qirui and Li, Zhengqin and Straub, Julian and Newcombe, Richard and Engel, Jakob}, title = {ShapeR: Robust Conditional 3D Shape Generation from Casual Captures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27157-27168} }
Exploring Conditions for Diffusion Models in Robotic Control: Heeseong Shin,

Byeongho Heo,

Dongyoon Han,

Seungryong Kim,

Taekyung Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Heeseong and Heo, Byeongho and Han, Dongyoon and Kim, Seungryong and Kim, Taekyung}, title = {Exploring Conditions for Diffusion Models in Robotic Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27967-27977} }
Learning What Matters: Prioritized Concept Learning via Relative Error-driven Sample Selection: Qian Yang,

Shivam Chandhok,

Oscar Mañas,

Kanishk Jain,

Aishwarya Agrawal,

Leonid Sigal; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Qian and Chandhok, Shivam and Ma\~nas, Oscar and Jain, Kanishk and Agrawal, Aishwarya and Sigal, Leonid}, title = {Learning What Matters: Prioritized Concept Learning via Relative Error-driven Sample Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15794-15804} }
Learning to Focus and Precise Cropping:A Reinforcement Learning Framework with Information Gaps and Grounding Loss for MLLMs: Xuanpu Zhao,

Zhentao Tan,

Dianmo Sheng,

Tianxiang Chen,

Yao Liu,

Yue Wu,

Tao Gong,

Qi Chu,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xuanpu and Tan, Zhentao and Sheng, Dianmo and Chen, Tianxiang and Liu, Yao and Wu, Yue and Gong, Tao and Chu, Qi and Yu, Nenghai}, title = {Learning to Focus and Precise Cropping:A Reinforcement Learning Framework with Information Gaps and Grounding Loss for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25938-25947} }
SeaCache: Spectral-Evolution-Aware Cache for Accelerating Diffusion Models: Jiwoo Chung,

Sangeek Hyun,

MinKyu Lee,

Byeongju Han,

Geonho Cha,

Dongyoon Wee,

Youngjun Hong,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2026_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Lee, MinKyu and Han, Byeongju and Cha, Geonho and Wee, Dongyoon and Hong, Youngjun and Heo, Jae-Pil}, title = {SeaCache: Spectral-Evolution-Aware Cache for Accelerating Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14283-14294} }
VideoRealBench: A Chain-of-Thought Realism Evaluation Benchmark for Generated Human-Centric Videos: Min Yang,

Xinwen Zhang,

Jialei Tang,

Xin Zhou,

Kehan Li,

Zeyi Huang,

Limin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Min and Zhang, Xinwen and Tang, Jialei and Zhou, Xin and Li, Kehan and Huang, Zeyi and Wang, Limin}, title = {VideoRealBench: A Chain-of-Thought Realism Evaluation Benchmark for Generated Human-Centric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18565-18575} }
gQIR: Generative Quanta Image Reconstruction: Aryan Garg,

Sizhuo Ma,

Mohit Gupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garg_2026_CVPR, author = {Garg, Aryan and Ma, Sizhuo and Gupta, Mohit}, title = {gQIR: Generative Quanta Image Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19759-19770} }
MatSpray: Fusing 2D Material World Knowledge on 3D Geometry: Philipp Langsteiner,

Jan-Niklas Dihlmann,

Hendrik Lensch; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Langsteiner_2026_CVPR, author = {Langsteiner, Philipp and Dihlmann, Jan-Niklas and Lensch, Hendrik}, title = {MatSpray: Fusing 2D Material World Knowledge on 3D Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22615-22625} }
ViRC: Enhancing Visual Interleaved Mathematical CoT with Reason Chunking: Lihong Wang,

Liangqi Li,

Weiwei Feng,

Jiamin Wu,

Changtao Miao,

Tieru Wu,

Rui Ma,

Bo Zhang,

Zhe Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lihong and Li, Liangqi and Feng, Weiwei and Wu, Jiamin and Miao, Changtao and Wu, Tieru and Ma, Rui and Zhang, Bo and Li, Zhe}, title = {ViRC: Enhancing Visual Interleaved Mathematical CoT with Reason Chunking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26144-26153} }
UniTEX: Universal High Fidelity Generative Texturing for 3D Shapes: Yixun Liang,

Kunming Luo,

Xiao Chen,

Rui Chen,

Hongyu Yan,

Weiyu Li,

Jiarui Liu,

Fei-Peng Tian,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yixun and Luo, Kunming and Chen, Xiao and Chen, Rui and Yan, Hongyu and Li, Weiyu and Liu, Jiarui and Tian, Fei-Peng and Tan, Ping}, title = {UniTEX: Universal High Fidelity Generative Texturing for 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19917-19927} }
ARM-Thinker: Reinforcing Multimodal Generative Reward Models with Agentic Tool Use and Visual Reasoning: Shengyuan Ding,

Xinyu Fang,

Ziyu Liu,

Yuhang Zang,

Yuhang Cao,

Xiangyu Zhao,

Haodong Duan,

Xiaoyi Dong,

Jianze Liang,

Bin Wang,

Conghui He,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Shengyuan and Fang, Xinyu and Liu, Ziyu and Zang, Yuhang and Cao, Yuhang and Zhao, Xiangyu and Duan, Haodong and Dong, Xiaoyi and Liang, Jianze and Wang, Bin and He, Conghui and Lin, Dahua and Wang, Jiaqi}, title = {ARM-Thinker: Reinforcing Multimodal Generative Reward Models with Agentic Tool Use and Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22195-22205} }
InTrain: Intrinsic Trainability for Zero-Cost Neural Architecture Search: Qinqin Zhou,

Fuhai Chen,

Jipeng Wu,

Zhiwei Chen,

Zhikai Hu,

Weiwei Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qinqin and Chen, Fuhai and Wu, Jipeng and Chen, Zhiwei and Hu, Zhikai and Cai, Weiwei}, title = {InTrain: Intrinsic Trainability for Zero-Cost Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20181-20190} }
Contrastive Cross-Bag Augmentation for Multiple Instance Learning-based Whole Slide Image Classification: Bo Zhang,

Xinan Xu,

Shuo Yan,

Yu Bai,

Zheng Zhang,

Wufan Wang,

Hui Gao,

Wendong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bo and Xu, Xinan and Yan, Shuo and Bai, Yu and Zhang, Zheng and Wang, Wufan and Gao, Hui and Wang, Wendong}, title = {Contrastive Cross-Bag Augmentation for Multiple Instance Learning-based Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21089-21098} }
Cluster-aware Anchor Learning for Multi-View Clustering: Zhe Chen,

Fanhui Meng,

Tianyang Xu,

Xiao-Jun Wu; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhe and Meng, Fanhui and Xu, Tianyang and Wu, Xiao-Jun}, title = {Cluster-aware Anchor Learning for Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17714-17723} }
Learning from Synthetic Data via Provenance-Based Input Gradient Guidance: Koshiro Nagano,

Ryo Fujii,

Ryo Hachiuma,

Fumiaki Sato,

Taiki Sekii,

Hideo Saito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagano_2026_CVPR, author = {Nagano, Koshiro and Fujii, Ryo and Hachiuma, Ryo and Sato, Fumiaki and Sekii, Taiki and Saito, Hideo}, title = {Learning from Synthetic Data via Provenance-Based Input Gradient Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18796-18805} }
MAGICIAN: Efficient Long-Term Planning with Imagined Gaussians for Active Mapping: Shiyao Li,

Antoine Guédon,

Shizhe Chen,

Vincent Lepetit; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shiyao and Gu\'edon, Antoine and Chen, Shizhe and Lepetit, Vincent}, title = {MAGICIAN: Efficient Long-Term Planning with Imagined Gaussians for Active Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21606-21615} }
Fast3Dcache: Training-free 3D Geometry Synthesis Acceleration: Mengyu Yang,

Yanming Yang,

Chenyi Xu,

Chenxi Song,

Yufan Zuo,

Tong Zhao,

Ruibo Li,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Mengyu and Yang, Yanming and Xu, Chenyi and Song, Chenxi and Zuo, Yufan and Zhao, Tong and Li, Ruibo and Zhang, Chi}, title = {Fast3Dcache: Training-free 3D Geometry Synthesis Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27030-27040} }
Fast Markov Random Field Optimisation for Topologically Noisy 3D Shape Matching: Paul Roetzer,

Johan Thunberg,

Zorah Lähner,

Florian Bernard; [pdf] [supp]
[bibtex]
@InProceedings{Roetzer_2026_CVPR, author = {Roetzer, Paul and Thunberg, Johan and L\"ahner, Zorah and Bernard, Florian}, title = {Fast Markov Random Field Optimisation for Topologically Noisy 3D Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24162-24172} }
MorphSeek: Fine-grained Latent Representation-Level Policy Optimization for Deformable Image Registration: Runxun Zhang,

Yizhou Liu,

Dongrui Li,

Bo Xu,

Jingwei Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Runxun and Liu, Yizhou and Li, Dongrui and Xu, Bo and Wei, Jingwei}, title = {MorphSeek: Fine-grained Latent Representation-Level Policy Optimization for Deformable Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27460-27470} }
DVAR: Dynamic Visual Autoregressive Modeling for Image Super-Resolution: Yu Zheng,

Kai Zhang,

Wei Zhu,

Qingguo Liu,

Xiantao Hu,

Jun Li,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yu and Zhang, Kai and Zhu, Wei and Liu, Qingguo and Hu, Xiantao and Li, Jun and Yang, Jian}, title = {DVAR: Dynamic Visual Autoregressive Modeling for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23378-23387} }
3DrawAgent: Teaching LLM to Draw in 3D with Early Contrastive Experience: Hongcan Xiao,

Xinyue Xiao,

Yilin Wang,

Yue Zhang,

Yonggang Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Hongcan and Xiao, Xinyue and Wang, Yilin and Zhang, Yue and Qi, Yonggang}, title = {3DrawAgent: Teaching LLM to Draw in 3D with Early Contrastive Experience}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27179-27187} }
GaussianGrow: Geometry-aware Gaussian Growing from 3D Point Clouds with Text Guidance: Weiqi Zhang,

Junsheng Zhou,

Haotian Geng,

Kanle Shi,

Shenkun Xu,

Yi Fang,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Weiqi and Zhou, Junsheng and Geng, Haotian and Shi, Kanle and Xu, Shenkun and Fang, Yi and Liu, Yu-Shen}, title = {GaussianGrow: Geometry-aware Gaussian Growing from 3D Point Clouds with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18968-18979} }
REVIVE 3D: Refinement via Encoded Voluminous Inflated prior for Volume Enhancement: Hankyeol Lee,

Wooyeol Baek,

Seongdo Kim,

Jongyoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Hankyeol and Baek, Wooyeol and Kim, Seongdo and Kim, Jongyoo}, title = {REVIVE 3D: Refinement via Encoded Voluminous Inflated prior for Volume Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26984-26994} }
GazeShift: Unsupervised Gaze Estimation and Dataset for VR: Gil Shapira,

Ishay Goldin,

Evgeny Artyomov,

Donghoon Kim,

Yosi Keller,

Niv Zehngut; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shapira_2026_CVPR, author = {Shapira, Gil and Goldin, Ishay and Artyomov, Evgeny and Kim, Donghoon and Keller, Yosi and Zehngut, Niv}, title = {GazeShift: Unsupervised Gaze Estimation and Dataset for VR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24290-24299} }
Layered 4D-Rotor Gaussian Splatting: A Compressed Representation for Long Dynamic Scenes: Hanjie Xu,

Yuanxing Duan,

Qiyu Dai,

Ge Li,

Baoquan Chen,

He Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Hanjie and Duan, Yuanxing and Dai, Qiyu and Li, Ge and Chen, Baoquan and Wang, He}, title = {Layered 4D-Rotor Gaussian Splatting: A Compressed Representation for Long Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18958-18967} }
First Logit Boosting: Visual Grounding Method to Mitigate Object Hallucination in Large Vision-Language Models: Jiwoo Ha,

Jongwoo Baek,

Jinhyun So; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ha_2026_CVPR, author = {Ha, Jiwoo and Baek, Jongwoo and So, Jinhyun}, title = {First Logit Boosting: Visual Grounding Method to Mitigate Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18241-18250} }
Gravitation-Driven Semantic Alignment for Text Video Retrieval: Yi Yang,

Zheng Wang,

Xing Xu,

Jingkuan Song,

Heng Tao Shen; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Wang, Zheng and Xu, Xing and Song, Jingkuan and Shen, Heng Tao}, title = {Gravitation-Driven Semantic Alignment for Text Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14946-14956} }
OmniGround: A Comprehensive Spatio-Temporal Grounding Benchmark for Real-World Complex Scenarios: Hong Gao,

Jingyu Wu,

Xiangkai Xu,

Kangni Xie,

Yunchen Zhang,

Bin Zhong,

Xurui Gao,

Min-Ling Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Hong and Wu, Jingyu and Xu, Xiangkai and Xie, Kangni and Zhang, Yunchen and Zhong, Bin and Gao, Xurui and Zhang, Min-Ling}, title = {OmniGround: A Comprehensive Spatio-Temporal Grounding Benchmark for Real-World Complex Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17588-17597} }
Draft and Refine with Visual Experts: Sungheon Jeong,

Ryozo Masukawa,

Jihong Park,

Sanggeon Yun,

Wenjun Huang,

Hanning Chen,

Mahdi Imani,

Mohsen Imani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Sungheon and Masukawa, Ryozo and Park, Jihong and Yun, Sanggeon and Huang, Wenjun and Chen, Hanning and Imani, Mahdi and Imani, Mohsen}, title = {Draft and Refine with Visual Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18816-18826} }
Seeing is Improving: Visual Feedback for Iterative Text Layout Refinement: Junrong Guo,

Shancheng Fang,

Yadong Qu,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Junrong and Fang, Shancheng and Qu, Yadong and Xie, Hongtao}, title = {Seeing is Improving: Visual Feedback for Iterative Text Layout Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25893-25903} }
Prototype-Guided Concept Erasure in Diffusion Models: Yuze Cai,

Jiahao Lu,

Hongxiang Shi,

Yichao Zhou,

Hong Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Yuze and Lu, Jiahao and Shi, Hongxiang and Zhou, Yichao and Lu, Hong}, title = {Prototype-Guided Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16509-16519} }
UETrack: A Unified and Efficient Framework for Single Object Tracking: Ben Kang,

Jie Zhao,

Xin Chen,

Wanting Geng,

Bin Zhang,

Lu Zhang,

Dong Wang,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Ben and Zhao, Jie and Chen, Xin and Geng, Wanting and Zhang, Bin and Zhang, Lu and Wang, Dong and Lu, Huchuan}, title = {UETrack: A Unified and Efficient Framework for Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20890-20901} }
More Natural, More Real: Object-aware Gaussian Splatting for 3D Visual Decoding from Human Brain: Haodong Jing,

Dongyao Jiang,

Jixin Wang,

Junhao Jia,

Yanshu Li,

Yongqiang Ma,

Nanning Zheng; [pdf]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Haodong and Jiang, Dongyao and Wang, Jixin and Jia, Junhao and Li, Yanshu and Ma, Yongqiang and Zheng, Nanning}, title = {More Natural, More Real: Object-aware Gaussian Splatting for 3D Visual Decoding from Human Brain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19033-19044} }
HSI-GPT2: A Dual-Granularity Large Motion Reasoning Model with Diffusion Refinement for Human-Scene Interaction: Yuan Wang,

Xiang Li,

Yali Li,

Xuege Hou,

Shengjin Wang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuan and Li, Xiang and Li, Yali and Hou, Xuege and Wang, Shengjin}, title = {HSI-GPT2: A Dual-Granularity Large Motion Reasoning Model with Diffusion Refinement for Human-Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16432-16442} }
Streamlined Knowledge Distillation: Hyeon-Jin Jeong,

Han-Jin Lee,

Seok-Hwan Choi; [pdf]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Hyeon-Jin and Lee, Han-Jin and Choi, Seok-Hwan}, title = {Streamlined Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26646-26655} }
Closed-Form Concept Erasure via Double Projections: Chi Zhang,

Jingpu Cheng,

Zhixian Wang,

Ping Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Cheng, Jingpu and Wang, Zhixian and Liu, Ping}, title = {Closed-Form Concept Erasure via Double Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24503-24513} }
Prototype-based Causal Intervention for Multi-Label Image Classification: Yanmin Li,

Zhilong Mao,

Mao Wang,

Lihua Liu,

Jibing Wu,

Weidong Bao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanmin and Mao, Zhilong and Wang, Mao and Liu, Lihua and Wu, Jibing and Bao, Weidong}, title = {Prototype-based Causal Intervention for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24738-24747} }
Real2Sim2Real: RetinalDepth-64K for Depth Estimation in Posterior Segment Ophthalmic Surgery: Bingwen Dong,

Gan Liu,

Xiaoxi Lu,

Guangcheng Chen,

Jialu Zhang,

Yan Hu,

Xiaoqing Zhang,

Jiang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Bingwen and Liu, Gan and Lu, Xiaoxi and Chen, Guangcheng and Zhang, Jialu and Hu, Yan and Zhang, Xiaoqing and Liu, Jiang}, title = {Real2Sim2Real: RetinalDepth-64K for Depth Estimation in Posterior Segment Ophthalmic Surgery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26899-26908} }
Learning 3D Shape Fidelity Metric from Real-world Distortions: Xuelu Feng,

Tianyu Luan,

Zixin Zhu,

Akshobhya Sharma,

Phani Nuney,

Junsong Yuan,

Chunming Qiao; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Xuelu and Luan, Tianyu and Zhu, Zixin and Sharma, Akshobhya and Nuney, Phani and Yuan, Junsong and Qiao, Chunming}, title = {Learning 3D Shape Fidelity Metric from Real-world Distortions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28391-28401} }
WHU-MARS: A Multispectral Aerial-Ground Benchmark Towards Any-Scenario Person Re-Identification: Yuxuan Zhao,

Zhongao Zhou,

Bin Yang,

He Li,

Jian Liang,

Jun Chen,

Bo Du,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuxuan and Zhou, Zhongao and Yang, Bin and Li, He and Liang, Jian and Chen, Jun and Du, Bo and Ye, Mang}, title = {WHU-MARS: A Multispectral Aerial-Ground Benchmark Towards Any-Scenario Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25461-25471} }
SAGE: Scalable Agentic 3D Scene Generation for Embodied AI: Hongchi Xia,

Xuan Li,

Zhaoshuo Li,

Qianli Ma,

Jiashu Xu,

Ming-Yu Liu,

Yin Cui,

Tsung-Yi Lin,

Wei-Chiu Ma,

Shenlong Wang,

Shuran Song,

Fangyin Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Hongchi and Li, Xuan and Li, Zhaoshuo and Ma, Qianli and Xu, Jiashu and Liu, Ming-Yu and Cui, Yin and Lin, Tsung-Yi and Ma, Wei-Chiu and Wang, Shenlong and Song, Shuran and Wei, Fangyin}, title = {SAGE: Scalable Agentic 3D Scene Generation for Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22358-22368} }
OMGTex: One-stage Multi-style Facial Texture Reconstruction without Geometry Guidance: Zitong Xiao,

Yuda Qiu,

Zisheng Ye,

Xiaoguang Han; [pdf]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Zitong and Qiu, Yuda and Ye, Zisheng and Han, Xiaoguang}, title = {OMGTex: One-stage Multi-style Facial Texture Reconstruction without Geometry Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21242-21251} }
Can You Learn to See Without Images? Procedural Warm-Up for Vision Transformers: Zachary Shinnick,

Liangze Jiang,

Hemanth Saratchandran,

Damien Teney,

Anton van den Hengel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shinnick_2026_CVPR, author = {Shinnick, Zachary and Jiang, Liangze and Saratchandran, Hemanth and Teney, Damien and van den Hengel, Anton}, title = {Can You Learn to See Without Images? Procedural Warm-Up for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27439-27448} }
LagerNVS: Latent Geometry for Fully Neural Real-time Novel View Synthesis: Stanislaw Szymanowicz,

Minghao Chen,

Jianyuan Wang,

Christian Rupprecht,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Szymanowicz_2026_CVPR, author = {Szymanowicz, Stanislaw and Chen, Minghao and Wang, Jianyuan and Rupprecht, Christian and Vedaldi, Andrea}, title = {LagerNVS: Latent Geometry for Fully Neural Real-time Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15443-15453} }
Linear Image Generation by Synthesizing Exposure Brackets: Yuekun Dai,

Zhoutong Zhang,

Shangchen Zhou,

Nanxuan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Yuekun and Zhang, Zhoutong and Zhou, Shangchen and Zhao, Nanxuan}, title = {Linear Image Generation by Synthesizing Exposure Brackets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16206-16215} }
Part$^{2}$GS: Part-aware Modeling of Articulated Objects using 3D Gaussian Splatting: Tianjiao Yu,

Vedant Shah,

Muntasir Wahed,

Ying Shen,

Kiet A. Nguyen,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Tianjiao and Shah, Vedant and Wahed, Muntasir and Shen, Ying and Nguyen, Kiet A. and Lourentzou, Ismini}, title = {Part\${\textasciicircum}\{2\}\$GS: Part-aware Modeling of Articulated Objects using 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18913-18923} }
Stake the Points: Structure-Faithful Instance Unlearning: Kiseong Hong,

JungKyoo Shin,

Eunwoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Kiseong and Shin, JungKyoo and Kim, Eunwoo}, title = {Stake the Points: Structure-Faithful Instance Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24524-24533} }
Phantom: Physical Object Interactions as Dynamic Triggers for NMS-Exploited Backdoors: Tianlin Huo,

Dongchuan Ran,

Ranjie Duan,

Yao Zhu,

Peilun Du,

Ningbo Yao,

Huanqian Yan,

Xu Han,

Qiang Yun,

Yuzheng Tan,

Yang Bao,

Yuan He; [pdf] [supp]
[bibtex]
@InProceedings{Huo_2026_CVPR, author = {Huo, Tianlin and Ran, Dongchuan and Duan, Ranjie and Zhu, Yao and Du, Peilun and Yao, Ningbo and Yan, Huanqian and Han, Xu and Yun, Qiang and Tan, Yuzheng and Bao, Yang and He, Yuan}, title = {Phantom: Physical Object Interactions as Dynamic Triggers for NMS-Exploited Backdoors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27906-27915} }
Evolutionary Multimodal Reasoning via Hierarchical Semantic Representation for Intent Recognition: Qianrui Zhou,

Hua Xu,

Yunjin Gu,

Yifan Wang,

Songze Li,

Hanlei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qianrui and Xu, Hua and Gu, Yunjin and Wang, Yifan and Li, Songze and Zhang, Hanlei}, title = {Evolutionary Multimodal Reasoning via Hierarchical Semantic Representation for Intent Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14979-14989} }
Degradation-Consistent Test-Time Adaptation for All-in-One Image Restoration: Ni Tang,

Shenghao Nie,

Xiaotong Luo,

Yuan Xie,

Yanyun Qu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Ni and Nie, Shenghao and Luo, Xiaotong and Xie, Yuan and Qu, Yanyun}, title = {Degradation-Consistent Test-Time Adaptation for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15476-15485} }
One-to-More: High-Fidelity Training-Free Anomaly Generation with Attention Control: Haoxiang Rao,

Zhao Wang,

Chenyang Si,

Yan Lyu,

Yuanyi Duan,

Fang Zhao,

Caifeng Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rao_2026_CVPR, author = {Rao, Haoxiang and Wang, Zhao and Si, Chenyang and Lyu, Yan and Duan, Yuanyi and Zhao, Fang and Shan, Caifeng}, title = {One-to-More: High-Fidelity Training-Free Anomaly Generation with Attention Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28492-28501} }
MARIS: Marine Open-Vocabulary Instance Segmentation: Bingyu Li,

Feiyu Wang,

Da Zhang,

Zhiyuan Zhao,

Junyu Gao,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingyu and Wang, Feiyu and Zhang, Da and Zhao, Zhiyuan and Gao, Junyu and Li, Xuelong}, title = {MARIS: Marine Open-Vocabulary Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24939-24949} }
PE3R: Perception-Efficient 3D Reconstruction: Jie Hu,

Shizun Wang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Jie and Wang, Shizun and Wang, Xinchao}, title = {PE3R: Perception-Efficient 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26877-26887} }
InstantViR: Real-Time Video Inverse Problem Solver with Distilled Diffusion Prior: Weimin Bai,

Suzhe Xu,

Yiwei Ren,

Jinhua Hao,

Ming Sun,

Wenzheng Chen,

He Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Weimin and Xu, Suzhe and Ren, Yiwei and Hao, Jinhua and Sun, Ming and Chen, Wenzheng and Sun, He}, title = {InstantViR: Real-Time Video Inverse Problem Solver with Distilled Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16583-16592} }
HyperGait: Unleashing the Power of Parsing for Gait Recognition in the Wild via Hypergraph: Jinkai Zheng,

Jiaqing Wei,

Xinxiang Jin,

Yaoqi Sun,

Xichun Sheng,

Ming Li,

Liangqiong Qu,

Xinchen Liu,

Wu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jinkai and Wei, Jiaqing and Jin, Xinxiang and Sun, Yaoqi and Sheng, Xichun and Li, Ming and Qu, Liangqiong and Liu, Xinchen and Liu, Wu}, title = {HyperGait: Unleashing the Power of Parsing for Gait Recognition in the Wild via Hypergraph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18513-18522} }
TWINGS: Thin Plate Splines Warp-aligned Initialization for Sparse-View Gaussian Splatting: Hyeseong Kim,

Geonhui Son,

Deukhee Lee,

Dosik Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeseong and Son, Geonhui and Lee, Deukhee and Hwang, Dosik}, title = {TWINGS: Thin Plate Splines Warp-aligned Initialization for Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26062-26071} }
CUE: Concept-Aware Multi-Label Expansion to Mitigate Concept Confusion in Long-Tailed Learning: Ruichi Zhang,

Chikai Shang,

Jiacheng Yang,

Mengke Li,

Yang Zhou,

Junlong Gao,

Yang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruichi and Shang, Chikai and Yang, Jiacheng and Li, Mengke and Zhou, Yang and Gao, Junlong and Lu, Yang}, title = {CUE: Concept-Aware Multi-Label Expansion to Mitigate Concept Confusion in Long-Tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15030-15039} }
VOSR: A Vision-Only Generative Model for Image Super-Resolution: Rongyuan Wu,

Lingchen Sun,

Zhengqiang Zhang,

Xiangtao Kong,

Jixin Zhao,

Shihao Wang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Rongyuan and Sun, Lingchen and Zhang, Zhengqiang and Kong, Xiangtao and Zhao, Jixin and Wang, Shihao and Zhang, Lei}, title = {VOSR: A Vision-Only Generative Model for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16311-16321} }
Mixture of Prototypes for Test-time Adaptive Segmentation: Guangrui Li,

Zhengyu Zhu,

Yongxin Ge; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Guangrui and Zhu, Zhengyu and Ge, Yongxin}, title = {Mixture of Prototypes for Test-time Adaptive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24990-25000} }
DPGF-Net: Dual-Prior Guided Fusion Network for Joint Assessment of Perceptual Quality and Semantic Consistency in AI-Generated Images: Tao Li,

Xingran Liao,

Mingliang Zhou; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tao and Liao, Xingran and Zhou, Mingliang}, title = {DPGF-Net: Dual-Prior Guided Fusion Network for Joint Assessment of Perceptual Quality and Semantic Consistency in AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19529-19538} }
All-in-One Slider for Attribute Manipulation in Diffusion Models: Weixin Ye,

Hongguang Zhu,

Wei Wang,

Yahui Liu,

Mengyu Wang,

Xuecheng Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Weixin and Zhu, Hongguang and Wang, Wei and Liu, Yahui and Wang, Mengyu and Nie, Xuecheng}, title = {All-in-One Slider for Attribute Manipulation in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18693-18702} }
Envision, Attend, Then Respond: Counterfactual Hallucination Mitigation in Large Vision-Language Models: Yuxuan Liang,

Fan Shi,

Rui Zhu,

Xu Li,

Xiaolei Chen,

Zhe Liu,

Bin Li,

Xiangyang Xue; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yuxuan and Shi, Fan and Zhu, Rui and Li, Xu and Chen, Xiaolei and Liu, Zhe and Li, Bin and Xue, Xiangyang}, title = {Envision, Attend, Then Respond: Counterfactual Hallucination Mitigation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18261-18272} }
Intrinsic Image Fusion for Multi-View 3D Material Reconstruction: Peter Kocsis,

Lukas Höllein,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Kocsis_2026_CVPR, author = {Kocsis, Peter and H\"ollein, Lukas and Nie{\ss}ner, Matthias}, title = {Intrinsic Image Fusion for Multi-View 3D Material Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22571-22580} }
DPAR: Dynamic Patchification for Efficient Autoregressive Visual Generation: Divyansh Srivastava,

Akshay Mehra,

Pranav Maneriker,

Debopam Sanyal,

Vishnu Raj,

Vijay Kamarshi,

Fan Du,

Joshua Kimball; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Divyansh and Mehra, Akshay and Maneriker, Pranav and Sanyal, Debopam and Raj, Vishnu and Kamarshi, Vijay and Du, Fan and Kimball, Joshua}, title = {DPAR: Dynamic Patchification for Efficient Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23215-23226} }
PrivSynth: Alternating and Control-Based Optimization for Privacy and Utility in Synthetic Data: Xinyuan Zhao,

Hanlin Gu,

Guibao Song,

Gongxi Zhu,

Yifei Zou,

Lixin Fan,

Yuxing Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xinyuan and Gu, Hanlin and Song, Guibao and Zhu, Gongxi and Zou, Yifei and Fan, Lixin and Han, Yuxing}, title = {PrivSynth: Alternating and Control-Based Optimization for Privacy and Utility in Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17430-17439} }
Deep Feature Deformation Weights: Richard Liu,

Itai Lang,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Richard and Lang, Itai and Hanocka, Rana}, title = {Deep Feature Deformation Weights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27378-27387} }
StoryTailor:A Zero-Shot Pipeline for Action-Rich Multi-Subject Visual Narratives: Jinghao Hu,

Yuhe Zhang,

Guohua Geng,

Kang Li,

Han Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Jinghao and Zhang, Yuhe and Geng, Guohua and Li, Kang and Zhang, Han}, title = {StoryTailor:A Zero-Shot Pipeline for Action-Rich Multi-Subject Visual Narratives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21922-21931} }
PhysGS: Bayesian-Inferred Gaussian Splatting for Physical Property Estimation: Samarth Chopra,

Jing Liang,

Gershom Seneviratne,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chopra_2026_CVPR, author = {Chopra, Samarth and Liang, Jing and Seneviratne, Gershom and Manocha, Dinesh}, title = {PhysGS: Bayesian-Inferred Gaussian Splatting for Physical Property Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18980-18990} }
GeoBridge: A Semantic-Anchored Multi-View Foundation Model Bridging Images and Text for Geo-Localization: Zixuan Song,

Jing Zhang,

Di Wang,

Zidie Zhou,

Wenbin Liu,

Haonan Guo,

En Wang,

Bo Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Zixuan and Zhang, Jing and Wang, Di and Zhou, Zidie and Liu, Wenbin and Guo, Haonan and Wang, En and Du, Bo}, title = {GeoBridge: A Semantic-Anchored Multi-View Foundation Model Bridging Images and Text for Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27793-27803} }
Eliciting Complex Spatial Reasoning in MLLMs through Wide-Baseline Matching: Hao Zhong,

Muzhi Zhu,

Shenyan Zeng,

Anzhou Li,

Cong Chen,

Hua Geng,

Duochao Shi,

Wentao Ye,

Tao Lin,

Hao Chen,

Chunhua Shen; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Hao and Zhu, Muzhi and Zeng, Shenyan and Li, Anzhou and Chen, Cong and Geng, Hua and Shi, Duochao and Ye, Wentao and Lin, Tao and Chen, Hao and Shen, Chunhua}, title = {Eliciting Complex Spatial Reasoning in MLLMs through Wide-Baseline Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16768-16778} }
GraspLDP: Towards Generalizable Grasping Policy via Latent Diffusion: Enda Xiang,

Haoxiang Ma,

Xinzhu Ma,

Zicheng Liu,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Enda and Ma, Haoxiang and Ma, Xinzhu and Liu, Zicheng and Huang, Di}, title = {GraspLDP: Towards Generalizable Grasping Policy via Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28032-28041} }
Multi-speaker Attention Alignment for Multimodal Social Interaction: Liangyang Ouyang,

Yifei Huang,

Mingfang Zhang,

Caixin Kang,

Ryosuke Furuta,

Yoichi Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Liangyang and Huang, Yifei and Zhang, Mingfang and Kang, Caixin and Furuta, Ryosuke and Sato, Yoichi}, title = {Multi-speaker Attention Alignment for Multimodal Social Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24608-24619} }
Underground Plant Exploration: Non-Destructive 3D Root Assessment with GPR Based on Point Graph Neural Network: Yuwei Zhou,

Guoyu Lu; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yuwei and Lu, Guoyu}, title = {Underground Plant Exploration: Non-Destructive 3D Root Assessment with GPR Based on Point Graph Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15616-15626} }
Few-for-Many Personalized Federated Learning: Ping Guo,

Tiantian Zhang,

Xi Lin,

Xiang Li,

Zhi-Ri Tang,

Qingfu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ping and Zhang, Tiantian and Lin, Xi and Li, Xiang and Tang, Zhi-Ri and Zhang, Qingfu}, title = {Few-for-Many Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17515-17524} }
AnchorSplat: Feed-Forward 3D Gaussian Splatting With 3D Geometric Priors: Xiaoxue Zhang,

Xiaoxu Zheng,

Yixuan Yin,

Tiao Zhao,

Kaihua Tang,

Michael Bi Mi,

Zhan Xu,

Dave Zhenyu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoxue and Zheng, Xiaoxu and Yin, Yixuan and Zhao, Tiao and Tang, Kaihua and Mi, Michael Bi and Xu, Zhan and Chen, Dave Zhenyu}, title = {AnchorSplat: Feed-Forward 3D Gaussian Splatting With 3D Geometric Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18924-18933} }
YOLO-ULM: Ultra-Lightweight Models for Real-Time Object Detection: Shasha Han,

Chong Li,

Xinning Wang,

Xuebo Li; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Shasha and Li, Chong and Wang, Xinning and Li, Xuebo}, title = {YOLO-ULM: Ultra-Lightweight Models for Real-Time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18418-18427} }
ProcessMaker: A Generalized Process Visualization Framework with Adaptive Sequence Steps on Diffusion Transformers: Mengling Xu,

Sisi You,

Yaning Li,

Bing-Kun Bao; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Mengling and You, Sisi and Li, Yaning and Bao, Bing-Kun}, title = {ProcessMaker: A Generalized Process Visualization Framework with Adaptive Sequence Steps on Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25699-25708} }
Recover to Predict: Progressive Retrospective Learning for Variable-Length Trajectory Prediction: Hao Zhou,

Lu Qi,

Xiangtai Li,

Jie Zhang,

Yi Liu,

Xu Yang,

Mingyu Fan,

Fei Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Qi, Lu and Li, Xiangtai and Zhang, Jie and Liu, Yi and Yang, Xu and Fan, Mingyu and Luo, Fei}, title = {Recover to Predict: Progressive Retrospective Learning for Variable-Length Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17863-17873} }
CoSMo3D: Open-World Promptable 3D Semantic Segmentation through LLM-Guided Canonical Spatial Modeling: Li Jin,

Weikai Chen,

Yujie Wang,

Yingda Yin,

Zeyu Hu,

Runze Zhang,

Keyang Luo,

Shengju Qian,

Xin Wang,

Xueying Qin; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Li and Chen, Weikai and Wang, Yujie and Yin, Yingda and Hu, Zeyu and Zhang, Runze and Luo, Keyang and Qian, Shengju and Wang, Xin and Qin, Xueying}, title = {CoSMo3D: Open-World Promptable 3D Semantic Segmentation through LLM-Guided Canonical Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14325-14334} }
Learning to Solve PDEs on Neural Shape Representations: Lilian Welschinger,

Yilin Liu,

Zican Wang,

Niloy J. Mitra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Welschinger_2026_CVPR, author = {Welschinger, Lilian and Liu, Yilin and Wang, Zican and Mitra, Niloy J.}, title = {Learning to Solve PDEs on Neural Shape Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20263-20272} }
Mimic Human Cognition, Master Multi-Image Reasoning: A Meta-Action Framework for Enhanced Visual Understanding: Jianghao Yin,

Qingbin Li,

Kun Sun,

Cheng Ding,

Jie Wang,

Qin Chen,

Jie Zhou,

Nan Wang,

Changqing Li,

Pei Wu,

Jian Xu,

Zheming Yang,

Liang He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Jianghao and Li, Qingbin and Sun, Kun and Ding, Cheng and Wang, Jie and Chen, Qin and Zhou, Jie and Wang, Nan and Li, Changqing and Wu, Pei and Xu, Jian and Yang, Zheming and He, Liang}, title = {Mimic Human Cognition, Master Multi-Image Reasoning: A Meta-Action Framework for Enhanced Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19253-19264} }
M4-RAG: A Massive-Scale Multilingual Multi-Cultural Multimodal RAG: David Anugraha,

Patrick Amadeus Irawan,

Anshul Singh,

En-Shiun Annie Lee,

Genta Indra Winata; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Anugraha_2026_CVPR, author = {Anugraha, David and Irawan, Patrick Amadeus and Singh, Anshul and Lee, En-Shiun Annie and Winata, Genta Indra}, title = {M4-RAG: A Massive-Scale Multilingual Multi-Cultural Multimodal RAG}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23083-23094} }
Hypergraph-State Collaborative Reasoning for Multi-Object Tracking: Zikai Song,

Junqing Yu,

Yi-Ping Phoebe Chen,

Wei Yang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Zikai and Yu, Junqing and Chen, Yi-Ping Phoebe and Yang, Wei and Wang, Xinchao}, title = {Hypergraph-State Collaborative Reasoning for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28123-28133} }
M^3KG-RAG: Multi-hop Multimodal Knowledge Graph-enhanced Retrieval-Augmented Generation: Hyeongcheol Park,

Jiyoung Seo,

Jaewon Mun,

Hogun Park,

Wonmin Byeon,

Sung June Kim,

Hyeonsoo Im,

JeungSub Lee,

Sangpil Kim; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Hyeongcheol and Seo, Jiyoung and Mun, Jaewon and Park, Hogun and Byeon, Wonmin and Kim, Sung June and Im, Hyeonsoo and Lee, JeungSub and Kim, Sangpil}, title = {M{\textasciicircum}3KG-RAG: Multi-hop Multimodal Knowledge Graph-enhanced Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14968-14978} }
Foundation Encoders Are All You Need for Preference-Aware Personalization: Hyungjin Kim,

Seokho Ahn,

Young-Duk Seo; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hyungjin and Ahn, Seokho and Seo, Young-Duk}, title = {Foundation Encoders Are All You Need for Preference-Aware Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14692-14701} }
RealUnify: Do Unified Models Truly Benefit from Unification? A Comprehensive Benchmark: Yang Shi,

Yuhao Dong,

Yue Ding,

Yuran Wang,

Xuanyu Zhu,

Sheng Zhou,

Wenting Liu,

Haochen Tian,

Rundong Wang,

Huanqian Wang,

Zuyan Liu,

Bohan Zeng,

Ruizhe Chen,

Qixun Wang,

Zhuoran Zhang,

Xinlong Chen,

Chengzhuo Tong,

Bozhou Li,

Qiang Liu,

Haotian Wang,

Wenjing Yang,

Yuanxing Zhang,

Pengfei Wan,

Yi-Fan Zhang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yang and Dong, Yuhao and Ding, Yue and Wang, Yuran and Zhu, Xuanyu and Zhou, Sheng and Liu, Wenting and Tian, Haochen and Wang, Rundong and Wang, Huanqian and Liu, Zuyan and Zeng, Bohan and Chen, Ruizhe and Wang, Qixun and Zhang, Zhuoran and Chen, Xinlong and Tong, Chengzhuo and Li, Bozhou and Liu, Qiang and Wang, Haotian and Yang, Wenjing and Zhang, Yuanxing and Wan, Pengfei and Zhang, Yi-Fan and Liu, Ziwei}, title = {RealUnify: Do Unified Models Truly Benefit from Unification? A Comprehensive Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22488-22497} }
MagicQuill V2: Precise and Interactive Image Editing with Layered Visual Cues: Zichen Liu,

Yue Yu,

Hao Ouyang,

Qiuyu Wang,

Shuailei Ma,

Ka Leong Cheng,

Wen Wang,

Qingyan Bai,

Yuxuan Zhang,

Yanhong Zeng,

Yixuan Li,

Xing Zhu,

Yujun Shen,

Qifeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zichen and Yu, Yue and Ouyang, Hao and Wang, Qiuyu and Ma, Shuailei and Cheng, Ka Leong and Wang, Wen and Bai, Qingyan and Zhang, Yuxuan and Zeng, Yanhong and Li, Yixuan and Zhu, Xing and Shen, Yujun and Chen, Qifeng}, title = {MagicQuill V2: Precise and Interactive Image Editing with Layered Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22467-22477} }
Node-RF: Learning Generalized Continuous Space-Time Scene Dynamics with Neural ODE-based NeRFs: Hiran Sarkar,

Liming Kuang,

Yordanka Velikova,

Benjamin Busam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarkar_2026_CVPR, author = {Sarkar, Hiran and Kuang, Liming and Velikova, Yordanka and Busam, Benjamin}, title = {Node-RF: Learning Generalized Continuous Space-Time Scene Dynamics with Neural ODE-based NeRFs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15411-15420} }
EI-Part:Explode for Completion and Implode for Refinement: Wanhu Sun,

Zhongjin Luo,

Heliang Zheng,

Jiahao Chang,

Chongjie Ye,

Huiang He,

Shengchu Zhao,

Rongfei Jia,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Wanhu and Luo, Zhongjin and Zheng, Heliang and Chang, Jiahao and Ye, Chongjie and He, Huiang and Zhao, Shengchu and Jia, Rongfei and Han, Xiaoguang}, title = {EI-Part:Explode for Completion and Implode for Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27007-27017} }
PCA-Seg: Revisiting Cost Aggregation for Open-Vocabulary Semantic and Part Segmentation: Jianjian Yin,

Tao Chen,

Yi Chen,

Gensheng Pei,

Xiangbo Shu,

Yazhou Yao,

Fumin Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Jianjian and Chen, Tao and Chen, Yi and Pei, Gensheng and Shu, Xiangbo and Yao, Yazhou and Shen, Fumin}, title = {PCA-Seg: Revisiting Cost Aggregation for Open-Vocabulary Semantic and Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27633-27643} }
LaRP: Efficient Multi-View Inpainting with Latent Reprojection Priors: Gaoyang Zhang,

Xinguo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gaoyang and Liu, Xinguo}, title = {LaRP: Efficient Multi-View Inpainting with Latent Reprojection Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21772-21783} }
Geometry-Aware Cross-Modal Graph Alignment for Referring Segmentation in 3D Gaussian Splatting: Yuwen Tao,

Kanglei Zhou,

Chang Li,

Liyuan Wang; [pdf]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Yuwen and Zhou, Kanglei and Li, Chang and Wang, Liyuan}, title = {Geometry-Aware Cross-Modal Graph Alignment for Referring Segmentation in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20433-20442} }
Predicting Spatial Transcriptomics from Histology Images via High-Order Multi-Cell Interaction Modeling: Youhan Sun,

Jiahua Rao,

Kangrui Du,

Jiancong Xie,

Yuedong Yang; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Youhan and Rao, Jiahua and Du, Kangrui and Xie, Jiancong and Yang, Yuedong}, title = {Predicting Spatial Transcriptomics from Histology Images via High-Order Multi-Cell Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19781-19790} }
Breaking Semantic Boundaries: Distribution-Guided Semantic Exploration for Creative Generation: Fu Feng,

Yucheng Xie,

Ruixiao Shi,

Xu Yang,

Jing Wang,

Xin Geng; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Fu and Xie, Yucheng and Shi, Ruixiao and Yang, Xu and Wang, Jing and Geng, Xin}, title = {Breaking Semantic Boundaries: Distribution-Guided Semantic Exploration for Creative Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14253-14262} }
Echoes of Ownership: Adversarial-Guided Dual Injection for Copyright Protection in MLLMs: Chengwei Xia,

Fan Ma,

Ruijie Quan,

Yunqiu Xu,

Kun Zhan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Chengwei and Ma, Fan and Quan, Ruijie and Xu, Yunqiu and Zhan, Kun and Yang, Yi}, title = {Echoes of Ownership: Adversarial-Guided Dual Injection for Copyright Protection in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20650-20659} }
CI-VID: A Coherent Interleaved Text-Video Dataset: Yiming Ju,

Jijin Hu,

Zhengxiong Luo,

Haoge Deng,

hanyu Zhao,

Li Du,

Wenbo Xiao,

Chengwei Wu,

Donglin Hao,

Xinlong Wang,

Tengfei Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ju_2026_CVPR, author = {Ju, Yiming and Hu, Jijin and Luo, Zhengxiong and Deng, Haoge and Zhao, hanyu and Du, Li and Xiao, Wenbo and Wu, Chengwei and Hao, Donglin and Wang, Xinlong and Pan, Tengfei}, title = {CI-VID: A Coherent Interleaved Text-Video Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25568-25577} }
AdaSVD: Singular Value Decomposition with Adaptive Mechanisms for Large Multimodal Models: Zhiteng Li,

Mingyuan Xia,

Jingyuan Zhang,

Zheng Hui,

Haotong Qin,

Linghe Kong,

Yulun Zhang,

Xiaokang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiteng and Xia, Mingyuan and Zhang, Jingyuan and Hui, Zheng and Qin, Haotong and Kong, Linghe and Zhang, Yulun and Yang, Xiaokang}, title = {AdaSVD: Singular Value Decomposition with Adaptive Mechanisms for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26358-26368} }
Beyond Prompt Degradation: Prototype-guided Dual-pool Prompting for Incremental Object Detection: Yaoteng Zhang,

Qing Zhou,

Junyu Gao,

Qi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yaoteng and Zhou, Qing and Gao, Junyu and Wang, Qi}, title = {Beyond Prompt Degradation: Prototype-guided Dual-pool Prompting for Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27568-27578} }
MergeVLA: Cross-Skill Model Merging Toward a Generalist Vision-Language-Action Agent: Yuxia Fu,

Zhizhen Zhang,

Yuqi Zhang,

Zijian Wang,

Zi Huang,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Yuxia and Zhang, Zhizhen and Zhang, Yuqi and Wang, Zijian and Huang, Zi and Luo, Yadan}, title = {MergeVLA: Cross-Skill Model Merging Toward a Generalist Vision-Language-Action Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22335-22347} }
HieraMamba: Video Temporal Grounding via Hierarchical Anchor-Mamba Pooling: Joungbin An,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Joungbin and Grauman, Kristen}, title = {HieraMamba: Video Temporal Grounding via Hierarchical Anchor-Mamba Pooling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16954-16965} }
WeDetect: Fast Open-Vocabulary Object Detection as Retrieval: Shenghao Fu,

Yukun Su,

Fengyun Rao,

Jing LYU,

Xiaohua Xie,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Shenghao and Su, Yukun and Rao, Fengyun and LYU, Jing and Xie, Xiaohua and Zheng, Wei-Shi}, title = {WeDetect: Fast Open-Vocabulary Object Detection as Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20377-20387} }
Guardians of the Hair: Rescuing Soft Boundaries in Depth, Stereo, and Novel Views: Xiang Zhang,

Studios blank,

Yang Zhang,

Studios blank,

Lukas Mehl,

Studios blank,

Markus Gross,

Studios blank,

Christopher Schroers,

Studios blank; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiang and blank, Studios and Zhang, Yang and blank, Studios and Mehl, Lukas and blank, Studios and Gross, Markus and blank, Studios and Schroers, Christopher and blank, Studios}, title = {Guardians of the Hair: Rescuing Soft Boundaries in Depth, Stereo, and Novel Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19822-19832} }
Content-Adaptive Hierarchical Hyperprior for Neural Video Coding: Junqi Liao,

Yaojun Wu,

Chaoyi Lin,

Zhipin Deng,

Li Li,

Dong Liu,

Xiaoyan Sun; [pdf]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Junqi and Wu, Yaojun and Lin, Chaoyi and Deng, Zhipin and Li, Li and Liu, Dong and Sun, Xiaoyan}, title = {Content-Adaptive Hierarchical Hyperprior for Neural Video Coding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20953-20962} }
Rethinking Prompt Design for Inference-time Scaling in Text-to-Visual Generation: Subin Kim,

Sangwoo Mo,

Mamshad Nayeem Rizve,

Yiran Xu,

Difan Liu,

Jinwoo Shin,

Tobias Hinz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Subin and Mo, Sangwoo and Rizve, Mamshad Nayeem and Xu, Yiran and Liu, Difan and Shin, Jinwoo and Hinz, Tobias}, title = {Rethinking Prompt Design for Inference-time Scaling in Text-to-Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22090-22099} }
Spike-driven Discrete Aggregation for Event-based Object Detection: Huaning Li,

Ziming Wang,

Runhao Jiang,

Yan Rui,

Huajin Tang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Huaning and Wang, Ziming and Jiang, Runhao and Rui, Yan and Tang, Huajin}, title = {Spike-driven Discrete Aggregation for Event-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15135-15144} }
Reframing Long-Tailed Learning via Loss Landscape Geometry: Shenghan Chen,

Yiming Liu,

Yanzhen Wang,

Yujia Wang,

Xiankai Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shenghan and Liu, Yiming and Wang, Yanzhen and Wang, Yujia and Lu, Xiankai}, title = {Reframing Long-Tailed Learning via Loss Landscape Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22227-22237} }
Precise Object and Effect Removal with Adaptive Target-Aware Attention: Jixin Zhao,

Zhouxia Wang,

Peiqing Yang,

Shangchen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jixin and Wang, Zhouxia and Yang, Peiqing and Zhou, Shangchen}, title = {Precise Object and Effect Removal with Adaptive Target-Aware Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19370-19379} }
EgoEdit: Dataset, Real-Time Streaming Model, and Benchmark for Egocentric Video Editing: Runjia Li,

Moayed Haji-Ali,

Ashkan Mirzaei,

Chaoyang Wang,

Arpit Sahni,

Ivan Skorokhodov,

Aliaksandr Siarohin,

Tomas Jakab,

Junlin Han,

Sergey Tulyakov,

Philip Torr,

Willi Menapace; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Runjia and Haji-Ali, Moayed and Mirzaei, Ashkan and Wang, Chaoyang and Sahni, Arpit and Skorokhodov, Ivan and Siarohin, Aliaksandr and Jakab, Tomas and Han, Junlin and Tulyakov, Sergey and Torr, Philip and Menapace, Willi}, title = {EgoEdit: Dataset, Real-Time Streaming Model, and Benchmark for Egocentric Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16042-16053} }
SceMoS: Scene-Aware 3D Human Motion Synthesis by Planning with Geometry-Grounded Tokens: Anindita Ghosh,

Vladislav Golyanik,

Taku Komura,

Philipp Slusallek,

Christian Theobalt,

Rishabh Dabral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Anindita and Golyanik, Vladislav and Komura, Taku and Slusallek, Philipp and Theobalt, Christian and Dabral, Rishabh}, title = {SceMoS: Scene-Aware 3D Human Motion Synthesis by Planning with Geometry-Grounded Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16443-16453} }
TriDF: Evaluating Perception, Detection, and Hallucination for Interpretable DeepFake Detection: Jian-Yu Jiang-Lin,

Kang-Yang Huang,

Ling Zou,

Ling Lo,

Sheng-Ping Yang,

Yu-Wen Tseng,

Kun-Hsiang Lin,

Chia-Ling Chen,

Yu-Ting Ta,

Yan-Tsung Wang,

Po-Ching Chen,

Hongxia Xie,

Hong-Han Shuai,

Wen-Huang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Jiang-Lin_2026_CVPR, author = {Jiang-Lin, Jian-Yu and Huang, Kang-Yang and Zou, Ling and Lo, Ling and Yang, Sheng-Ping and Tseng, Yu-Wen and Lin, Kun-Hsiang and Chen, Chia-Ling and Ta, Yu-Ting and Wang, Yan-Tsung and Chen, Po-Ching and Xie, Hongxia and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {TriDF: Evaluating Perception, Detection, and Hallucination for Interpretable DeepFake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17087-17098} }
Re-evaluating Continual VQA: Toward Fair and Robust Evaluation for Multimodal Continual Learning: Zijian Gao,

Zicheng Sun,

Xingxing Zhang,

Kele Xu,

Huaimin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Zijian and Sun, Zicheng and Zhang, Xingxing and Xu, Kele and Wang, Huaimin}, title = {Re-evaluating Continual VQA: Toward Fair and Robust Evaluation for Multimodal Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18021-18031} }
RefTon: Reference person shot assist virtual Try-on: Liuzhuozheng Li,

Yue Gong,

Shanyuan Liu,

Zanyi Wang,

Dengyang Jiang,

Leibucha Wu,

Bo Cheng,

Yuhang Ma,

Dawei Leng,

Yuhui Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Liuzhuozheng and Gong, Yue and Liu, Shanyuan and Wang, Zanyi and Jiang, Dengyang and Wu, Leibucha and Cheng, Bo and Ma, Yuhang and Leng, Dawei and Yin, Yuhui}, title = {RefTon: Reference person shot assist virtual Try-on}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14915-14925} }
Let VLMs Grade Their Own Thoughts: A Self-Quantification Approach to Reasoning-Aware Reward Modeling: Xing Xi,

Yu Qiu,

Ronghua Luo,

Peixian Chen,

peilin tong; [pdf] [supp]
[bibtex]
@InProceedings{Xi_2026_CVPR, author = {Xi, Xing and Qiu, Yu and Luo, Ronghua and Chen, Peixian and tong, peilin}, title = {Let VLMs Grade Their Own Thoughts: A Self-Quantification Approach to Reasoning-Aware Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26487-26496} }
PSR: Scaling Multi-Subject Personalized Image Generation with Pairwise Subject-Consistency Rewards: Shulei Wang,

Longhui Wei,

Xin He,

Jianbo Ouyang,

Hui Lu,

Zhou Zhao,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shulei and Wei, Longhui and He, Xin and Ouyang, Jianbo and Lu, Hui and Zhao, Zhou and Tian, Qi}, title = {PSR: Scaling Multi-Subject Personalized Image Generation with Pairwise Subject-Consistency Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14756-14766} }
UniMMAD: Unified Multi-Modal and Multi-Class Anomaly Detection via MoE-Driven Feature Decompression: Yuan Zhao,

Youwei Pang,

Lihe Zhang,

Hanqi Liu,

Jiaming Zuo,

Huchuan Lu,

Xiaoqi Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuan and Pang, Youwei and Zhang, Lihe and Liu, Hanqi and Zuo, Jiaming and Lu, Huchuan and Zhao, Xiaoqi}, title = {UniMMAD: Unified Multi-Modal and Multi-Class Anomaly Detection via MoE-Driven Feature Decompression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28502-28511} }
Where MLLMs Attend and What They Rely On: Explaining Autoregressive Token Generation: Ruoyu Chen,

Xiaoqing Guo,

Kangwei Liu,

Siyuan Liang,

Shiming Liu,

Qunli Zhang,

Laiyuan Wang,

Hua Zhang,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ruoyu and Guo, Xiaoqing and Liu, Kangwei and Liang, Siyuan and Liu, Shiming and Zhang, Qunli and Wang, Laiyuan and Zhang, Hua and Cao, Xiaochun}, title = {Where MLLMs Attend and What They Rely On: Explaining Autoregressive Token Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17057-17066} }
StructXLIP: Enhancing Vision-language Models with Multimodal Structural Cues: Zanxi Ruan,

Songqun Gao,

Qiuyu Kong,

Yiming Wang,

Marco Cristani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Zanxi and Gao, Songqun and Kong, Qiuyu and Wang, Yiming and Cristani, Marco}, title = {StructXLIP: Enhancing Vision-language Models with Multimodal Structural Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17292-17302} }
M3DocDep: Multi-modal, Multi-page, Multi-document Dependency Chunking with Large Vision-Language Models: Joongmin Shin,

Jeongbae Park,

Jaehyung Seo,

Heuiseok Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Joongmin and Park, Jeongbae and Seo, Jaehyung and Lim, Heuiseok}, title = {M3DocDep: Multi-modal, Multi-page, Multi-document Dependency Chunking with Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16603-16613} }
PoseAnything: General Pose-guided Video Generation with Part-aware Temporal Coherence: Ruiyan Wang,

Teng Hu,

Kaihui Huang,

Zihan Su,

Ran Yi,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ruiyan and Hu, Teng and Huang, Kaihui and Su, Zihan and Yi, Ran and Ma, Lizhuang}, title = {PoseAnything: General Pose-guided Video Generation with Part-aware Temporal Coherence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23193-23203} }
Learning to Adapt: Self-Improving Web Agent via Cognitive-Aware Exploration: Weile Chen,

Bingchen Miao,

Qifan Yu,

Wendong Bu,

Guoming Wang,

Wenqiao Zhang,

Shengyu Zhang,

Juncheng Li,

Siliang Tang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Weile and Miao, Bingchen and Yu, Qifan and Bu, Wendong and Wang, Guoming and Zhang, Wenqiao and Zhang, Shengyu and Li, Juncheng and Tang, Siliang}, title = {Learning to Adapt: Self-Improving Web Agent via Cognitive-Aware Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22402-22411} }
Muses: Designing, Composing, Generating Nonexistent Fantasy 3D Creatures without Training: Hexiao Lu,

Xiaokun Sun,

Zeyu Cai,

Hao Guo,

Ying Tai,

Jian Yang,

Zhenyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Hexiao and Sun, Xiaokun and Cai, Zeyu and Guo, Hao and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {Muses: Designing, Composing, Generating Nonexistent Fantasy 3D Creatures without Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26995-27006} }
CoFiDA-M: Concept-Aware Feature Modulation for Cross-Domain Adaptation with Image-Only Inference: Nurjahan Sultana,

Moi Hoon Yap,

Xinqi Fan,

Wenqi Lu; [pdf] [supp]
[bibtex]
@InProceedings{Sultana_2026_CVPR, author = {Sultana, Nurjahan and Yap, Moi Hoon and Fan, Xinqi and Lu, Wenqi}, title = {CoFiDA-M: Concept-Aware Feature Modulation for Cross-Domain Adaptation with Image-Only Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15060-15069} }
GSV2X: Geometry-Aware Uncertainty Modeling and Orthogonal Fusion for Robust Roadside Perception: Jianqiang Xu,

Gensheng Pei,

Huafeng Liu,

Yazhou Yao; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jianqiang and Pei, Gensheng and Liu, Huafeng and Yao, Yazhou}, title = {GSV2X: Geometry-Aware Uncertainty Modeling and Orthogonal Fusion for Robust Roadside Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21409-21419} }
SIMPLEPOSTER: A SIMPLE BASELINE FOR PRODUCT POSTER GENERATION: Benlei Cui,

Fangao Zeng,

Weitao Jiang,

Yuwen Zhai,

Haiwen Hong,

Longtao Huang,

Hui Xue,

Wenxiang Shang,

Pipei Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Benlei and Zeng, Fangao and Jiang, Weitao and Zhai, Yuwen and Hong, Haiwen and Huang, Longtao and Xue, Hui and Shang, Wenxiang and Huang, Pipei}, title = {SIMPLEPOSTER: A SIMPLE BASELINE FOR PRODUCT POSTER GENERATION}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14854-14863} }
Towards Policy-Adaptive Image Guardrail: Benchmark and Method: Caiyong Piao,

Zhiyuan Yan,

Haoming Xu,

Yunzhen Zhao,

Kaiqing Lin,

Feiyang Xu,

Shuigeng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Piao_2026_CVPR, author = {Piao, Caiyong and Yan, Zhiyuan and Xu, Haoming and Zhao, Yunzhen and Lin, Kaiqing and Xu, Feiyang and Zhou, Shuigeng}, title = {Towards Policy-Adaptive Image Guardrail: Benchmark and Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16614-16623} }
Sparsity as a Key: Unlocking New Insights from Latent Structures for Out-of-Distribution Detection: Ahyoung Oh,

Wonseok Shin,

Songkuk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Ahyoung and Shin, Wonseok and Kim, Songkuk}, title = {Sparsity as a Key: Unlocking New Insights from Latent Structures for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19298-19307} }
NAMI: Efficient Image Generation via Bridged Progressive Rectified Flow Transformers: Yuhang Ma,

Bo Cheng,

Shanyuan Liu,

Hongyi Zhou,

Liebucha Wu,

Dawei Leng,

Yuhui Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yuhang and Cheng, Bo and Liu, Shanyuan and Zhou, Hongyi and Wu, Liebucha and Leng, Dawei and Yin, Yuhui}, title = {NAMI: Efficient Image Generation via Bridged Progressive Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25667-25676} }
Temporal Equilibrium MeanFlow: Bridging the Scale Gap for One-Step Generation: Yuanpeng Tu,

Yunpeng Chen,

Xinyu Zhang,

Chao Liao,

Hengshuang Zhao; [pdf]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Yuanpeng and Chen, Yunpeng and Zhang, Xinyu and Liao, Chao and Zhao, Hengshuang}, title = {Temporal Equilibrium MeanFlow: Bridging the Scale Gap for One-Step Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16064-16073} }
Remedying Target-Domain Astigmatism for Cross-Domain Few-Shot Object Detection: Yongwei Jiang,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yongwei and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Remedying Target-Domain Astigmatism for Cross-Domain Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19580-19590} }
Stealing Split Learning Bottom Models by Recovering Embedding Geometry: Qinbo Zhang,

Yanhang Shi,

Ziyi Zhang,

Hao Wang,

Sai Qian Zhang,

Jian Li; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qinbo and Shi, Yanhang and Zhang, Ziyi and Wang, Hao and Zhang, Sai Qian and Li, Jian}, title = {Stealing Split Learning Bottom Models by Recovering Embedding Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20660-20669} }
SuP: Sub-cloud Driven Point Cloud Registration: Sheldon Fung,

Wei Pan,

Ling Cao,

Fei Hou,

Ling Chen,

Shasha Mao,

Hongdong Li,

Xuequan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Fung_2026_CVPR, author = {Fung, Sheldon and Pan, Wei and Cao, Ling and Hou, Fei and Chen, Ling and Mao, Shasha and Li, Hongdong and Lu, Xuequan}, title = {SuP: Sub-cloud Driven Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24185-24194} }
Low-Resolution Editing is All You Need for High-Resolution Editing: Junsung Lee,

Hyunsoo Lee,

Yong Jae Lee,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junsung and Lee, Hyunsoo and Lee, Yong Jae and Han, Bohyung}, title = {Low-Resolution Editing is All You Need for High-Resolution Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16216-16225} }
Parse, Search, and Confirmation: Training-Free Aerial Vision-and-Dialog Navigation with Chain-of-Thought Reasoning and Structured Spatial Memory: Yu Qi,

Hongyu Li,

Shaofei Huang,

Tianrui Hui,

Yaxiong Wang,

Lechao Cheng,

Zhun Zhong,

Si Liu,

Meng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Yu and Li, Hongyu and Huang, Shaofei and Hui, Tianrui and Wang, Yaxiong and Cheng, Lechao and Zhong, Zhun and Liu, Si and Wang, Meng}, title = {Parse, Search, and Confirmation: Training-Free Aerial Vision-and-Dialog Navigation with Chain-of-Thought Reasoning and Structured Spatial Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23859-23868} }
Revisiting F-measure Optimization in Multi-Label Classification: A Sampling-based Approach: Zixun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zixun}, title = {Revisiting F-measure Optimization in Multi-Label Classification: A Sampling-based Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16845-16854} }
WiTTA-Bench: Benchmarking Test-Time Adaptation for WiFi Sensing: Bing Li,

Qiang Wang,

Junda Lu,

Le Zhang,

Yun Liu,

Ce Zhu,

Wei Cui; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bing and Wang, Qiang and Lu, Junda and Zhang, Le and Liu, Yun and Zhu, Ce and Cui, Wei}, title = {WiTTA-Bench: Benchmarking Test-Time Adaptation for WiFi Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18461-18470} }
FluoCLIP: Stain-Aware Focus Quality Assessment in Fluorescence Microscopy: Hyejin Park,

Jiwon Yoon,

Sumin Park,

Suree Kim,

Sinae Jang,

Eunsoo Lee,

Dongmin Kang,

Dongbo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Hyejin and Yoon, Jiwon and Park, Sumin and Kim, Suree and Jang, Sinae and Lee, Eunsoo and Kang, Dongmin and Min, Dongbo}, title = {FluoCLIP: Stain-Aware Focus Quality Assessment in Fluorescence Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28288-28297} }
PoseD-Flow: Versatile and Guided Flow Matching Model of Human Pose: Jebastin Nadar,

Simone Foti,

Tolga Birdal; [pdf] [supp]
[bibtex]
@InProceedings{Nadar_2026_CVPR, author = {Nadar, Jebastin and Foti, Simone and Birdal, Tolga}, title = {PoseD-Flow: Versatile and Guided Flow Matching Model of Human Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21165-21175} }
Toward Real-world Infrared Image Super-Resolution: A Unified Autoregressive Framework and Benchmark Dataset: Yang Zou,

Jun Ma,

Zhidong Jiao,

Xingyuan Li,

Zhiying Jiang,

Jinyuan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Yang and Ma, Jun and Jiao, Zhidong and Li, Xingyuan and Jiang, Zhiying and Liu, Jinyuan}, title = {Toward Real-world Infrared Image Super-Resolution: A Unified Autoregressive Framework and Benchmark Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16365-16375} }
Cell-Type Prototype-Informed Neural Network for Gene Expression Estimation from Pathology Images: Kazuya Nishimura,

Ryoma Bise,

Shinnosuke Matsuo,

Haruka Hirose,

Yasuhiro Kojima; [pdf] [arXiv]
[bibtex]
@InProceedings{Nishimura_2026_CVPR, author = {Nishimura, Kazuya and Bise, Ryoma and Matsuo, Shinnosuke and Hirose, Haruka and Kojima, Yasuhiro}, title = {Cell-Type Prototype-Informed Neural Network for Gene Expression Estimation from Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19801-19811} }
SelecTKD: Selective Token-Weighted Knowledge Distillation for LLMs: Haiduo Huang,

Jiangcheng Song,

Yadong Zhang,

Pengju Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Haiduo and Song, Jiangcheng and Zhang, Yadong and Ren, Pengju}, title = {SelecTKD: Selective Token-Weighted Knowledge Distillation for LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19287-19297} }
Prune2Drive: A Plug-and-Play Framework for Accelerating Vision-Language Models in Autonomous Driving: Minhao Xiong,

Zichen Wen,

Zhuangcheng Gu,

Xuyang Liu,

Rui Zhang,

Hengrui Kang,

Jiabing Yang,

Junyuan Zhang,

Weijia Li,

Conghui He,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Minhao and Wen, Zichen and Gu, Zhuangcheng and Liu, Xuyang and Zhang, Rui and Kang, Hengrui and Yang, Jiabing and Zhang, Junyuan and Li, Weijia and He, Conghui and Zhang, Linfeng}, title = {Prune2Drive: A Plug-and-Play Framework for Accelerating Vision-Language Models in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25215-25224} }
VGGT-360: Geometry-Consistent Zero-Shot Panoramic Depth Estimation: Jiayi Yuan,

Haobo Jiang,

De Wen Soh,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Jiayi and Jiang, Haobo and Soh, De Wen and Zhao, Na}, title = {VGGT-360: Geometry-Consistent Zero-Shot Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19874-19883} }
LitePT: Lighter Yet Stronger Point Transformer: Yuanwen Yue,

Damien Robert,

Jianyuan Wang,

Sunghwan Hong,

Jan Dirk Wegner,

Christian Rupprecht,

Konrad Schindler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Yuanwen and Robert, Damien and Wang, Jianyuan and Hong, Sunghwan and Wegner, Jan Dirk and Rupprecht, Christian and Schindler, Konrad}, title = {LitePT: Lighter Yet Stronger Point Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24173-24184} }
Domain-Skewed Federated Learning with Feature Decoupling and Calibration: Huan Wang,

Jun Shen,

Jun Yan,

Guansong Pang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Huan and Shen, Jun and Yan, Jun and Pang, Guansong}, title = {Domain-Skewed Federated Learning with Feature Decoupling and Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17484-17493} }
MoBind: Motion Binding for Fine-Grained IMU-Video Pose Alignment: Duc Duy Nguyen,

Tat-Jun Chin,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Duc Duy and Chin, Tat-Jun and Hoai, Minh}, title = {MoBind: Motion Binding for Fine-Grained IMU-Video Pose Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22123-22132} }
Beyond the Static World: Continual Category Discovery under Visual Drift: Wei Feng,

Yiwen Jiang,

Sijin Zhou,

Zongyuan Ge; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Jiang, Yiwen and Zhou, Sijin and Ge, Zongyuan}, title = {Beyond the Static World: Continual Category Discovery under Visual Drift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25032-25042} }
Visual Personalization Turing Test: Rameen Abdal,

James Burgess,

Sergey Tulyakov,

Kuan-Chieh Jackson Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abdal_2026_CVPR, author = {Abdal, Rameen and Burgess, James and Tulyakov, Sergey and Wang, Kuan-Chieh Jackson}, title = {Visual Personalization Turing Test}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14789-14799} }
Multimodal Distribution Matching for Vision-Language Dataset Distillation: Jongoh Jeong,

Hoyong Kwon,

Minseok Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Jongoh and Kwon, Hoyong and Kim, Minseok and Yoon, Kuk-Jin}, title = {Multimodal Distribution Matching for Vision-Language Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23072-23082} }
HAMMER: Harnessing MLLMs via Cross-Modal Integration for Intention-Driven 3D Affordance Grounding: Lei Yao,

Yong Chen,

Yuejiao Su,

Yi Wang,

Moyun Liu,

Lap-Pui Chau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Lei and Chen, Yong and Su, Yuejiao and Wang, Yi and Liu, Moyun and Chau, Lap-Pui}, title = {HAMMER: Harnessing MLLMs via Cross-Modal Integration for Intention-Driven 3D Affordance Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23805-23815} }
Localizing, Structuring, and Rendering: Bridging 3D and 2D Vision-Language-Action Models for Robotic Manipulation: Yunlong Zhao,

Xiaoheng Deng,

Yichao Cao,

Yi Chen,

Xiangjian He,

Shan You,

Shuo Yang,

Lei Fan,

Fei Wang,

Xiu Su; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yunlong and Deng, Xiaoheng and Cao, Yichao and Chen, Yi and He, Xiangjian and You, Shan and Yang, Shuo and Fan, Lei and Wang, Fei and Su, Xiu}, title = {Localizing, Structuring, and Rendering: Bridging 3D and 2D Vision-Language-Action Models for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20812-20822} }
Masked-Diffusion Autoencoders for 3D Medical Vision Representation Learning: Jiachen Tu,

Guanghui Qin,

Theodore Zhengde Zhao,

Jeya Maria Jose Valanarasu,

Sheng Zhang,

Tristan Naumann,

Fan Lam,

Sheng Wang,

Hoifung Poon; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Jiachen and Qin, Guanghui and Zhao, Theodore Zhengde and Valanarasu, Jeya Maria Jose and Zhang, Sheng and Naumann, Tristan and Lam, Fan and Wang, Sheng and Poon, Hoifung}, title = {Masked-Diffusion Autoencoders for 3D Medical Vision Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22804-22815} }
Hidden Monotonicity: Explaining Deep Neural Networks via their DC Decomposition: Jakob Paul Zimmermann,

Georg Loho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zimmermann_2026_CVPR, author = {Zimmermann, Jakob Paul and Loho, Georg}, title = {Hidden Monotonicity: Explaining Deep Neural Networks via their DC Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24107-24117} }
DriveVLN: Towards Mapless Vision-and-Language Navigation in Autonomous Driving: Dongqian Guo,

Haoran Wei,

Wencheng Han,

Runzhou Tao,

Zhongying Qiu,

Jianfei Yang,

Jianbing Shen; [pdf]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Dongqian and Wei, Haoran and Han, Wencheng and Tao, Runzhou and Qiu, Zhongying and Yang, Jianfei and Shen, Jianbing}, title = {DriveVLN: Towards Mapless Vision-and-Language Navigation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25174-25183} }
TGT: Text-Grounded Trajectories for Locally Controlled Video Generation: Guofeng Zhang,

Angtian Wang,

Jacob Zhiyuan Fang,

Liming Jiang,

Haotian Yang,

Bo Liu,

Yiding Yang,

Guang Chen,

Longyin Wen,

Alan Yuille,

Chongyang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guofeng and Wang, Angtian and Fang, Jacob Zhiyuan and Jiang, Liming and Yang, Haotian and Liu, Bo and Yang, Yiding and Chen, Guang and Wen, Longyin and Yuille, Alan and Ma, Chongyang}, title = {TGT: Text-Grounded Trajectories for Locally Controlled Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22028-22037} }
Talk2Move: Reinforcement Learning for Text-Instructed Object-Level Geometric Transformation in Scenes: Jing Tan,

Zhaoyang Zhang,

Yantao Shen,

Jiarui Cai,

Shuo Yang,

Jiajun Wu,

Wei Xia,

Zhuowen Tu,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Jing and Zhang, Zhaoyang and Shen, Yantao and Cai, Jiarui and Yang, Shuo and Wu, Jiajun and Xia, Wei and Tu, Zhuowen and Soatto, Stefano}, title = {Talk2Move: Reinforcement Learning for Text-Instructed Object-Level Geometric Transformation in Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14735-14745} }
Linear Fundamental Matrix Estimation from 7 or 5 Points: Taci Ata Kucukpinar,

Juan Mogollon,

Joshua Fraser,

Timothy Duff,

Kannappan Palaniappan; [pdf] [supp]
[bibtex]
@InProceedings{Kucukpinar_2026_CVPR, author = {Kucukpinar, Taci Ata and Mogollon, Juan and Fraser, Joshua and Duff, Timothy and Palaniappan, Kannappan}, title = {Linear Fundamental Matrix Estimation from 7 or 5 Points}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21464-21473} }
Training-free Motion Factorization for Compositional Video Generation: Zixuan Wang,

Ziqin Zhou,

Feng Chen,

Duo Peng,

Yixin Hu,

Changsheng Li,

Yinjie Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zixuan and Zhou, Ziqin and Chen, Feng and Peng, Duo and Hu, Yixin and Li, Changsheng and Lei, Yinjie}, title = {Training-free Motion Factorization for Compositional Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23139-23149} }
Decouple Your Discovery and Memory in Continual Generalized Category Discovery: Jiawei Yu,

Zijian Gao,

Xingxing Zhang,

Xuan Liu,

Huaimin Wang,

Kele Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Jiawei and Gao, Zijian and Zhang, Xingxing and Liu, Xuan and Wang, Huaimin and Xu, Kele}, title = {Decouple Your Discovery and Memory in Continual Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25022-25031} }
Synthesizing Visual Concepts as Vision-Language Programs: Antonia Wüst,

Wolfgang Stammer,

Hikaru Shindo,

Lukas Helff,

Devendra Singh Dhami,

Kristian Kersting; [pdf] [supp]
[bibtex]
@InProceedings{Wust_2026_CVPR, author = {W\"ust, Antonia and Stammer, Wolfgang and Shindo, Hikaru and Helff, Lukas and Dhami, Devendra Singh and Kersting, Kristian}, title = {Synthesizing Visual Concepts as Vision-Language Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17346-17356} }
Learning Scene Coordinate Reconstruction from Unposed Images via Pose Graph Optimization: Tze Ho Elden Tse,

Jizong Peng,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Tse_2026_CVPR, author = {Tse, Tze Ho Elden and Peng, Jizong and Yao, Angela}, title = {Learning Scene Coordinate Reconstruction from Unposed Images via Pose Graph Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21816-21825} }
Memory-Augmented Scene Understanding and Exploration for Open-World Aerial Object-Goal Navigation: Jiacong Zhou,

Jiaxu Miao,

Yourun Lin,

Xianyun Wang,

Jun Xiao,

Jun Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Jiacong and Miao, Jiaxu and Lin, Yourun and Wang, Xianyun and Xiao, Jun and Yu, Jun}, title = {Memory-Augmented Scene Understanding and Exploration for Open-World Aerial Object-Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21616-21626} }
GTR-Turbo: Merged Checkpoint is Secretly a Free Teacher for Agentic VLM Training: Tong Wei,

Yijun Yang,

Changhao Zhang,

Junliang Xing,

Yuanchun Shi,

Zongqing Lu,

Deheng Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Tong and Yang, Yijun and Zhang, Changhao and Xing, Junliang and Shi, Yuanchun and Lu, Zongqing and Ye, Deheng}, title = {GTR-Turbo: Merged Checkpoint is Secretly a Free Teacher for Agentic VLM Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26476-26486} }
ReMoGen: Real-time Human Interaction-to-Reaction Generation via Modular Learning from Diverse Data: Yaoqin Ye,

Yiteng Xu,

Qin Sun,

Xinge Zhu,

Yujing Sun,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yaoqin and Xu, Yiteng and Sun, Qin and Zhu, Xinge and Sun, Yujing and Ma, Yuexin}, title = {ReMoGen: Real-time Human Interaction-to-Reaction Generation via Modular Learning from Diverse Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16475-16485} }
PALM: Progress-Aware Policy Learning via Affordance Reasoning for Long-Horizon Robotic Manipulation: Yuanzhe Liu,

Jingyuan Zhu,

Yuchen Mo,

Gen Li,

Xu Cao,

Jin Jin,

Yifan Shen,

Zhengyuan Li,

Tianjiao Yu,

Wenzhen Yuan,

Fangqiang Ding,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuanzhe and Zhu, Jingyuan and Mo, Yuchen and Li, Gen and Cao, Xu and Jin, Jin and Shen, Yifan and Li, Zhengyuan and Yu, Tianjiao and Yuan, Wenzhen and Ding, Fangqiang and Lourentzou, Ismini}, title = {PALM: Progress-Aware Policy Learning via Affordance Reasoning for Long-Horizon Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28096-28110} }
DSFlash: Comprehensive Panoptic Scene Graph Generation in Realtime: Julian Lorenz,

Vladyslav Kovganko,

Elias Kohout,

Mrunmai Phatak,

Daniel Kienzle,

Rainer Lienhart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lorenz_2026_CVPR, author = {Lorenz, Julian and Kovganko, Vladyslav and Kohout, Elias and Phatak, Mrunmai and Kienzle, Daniel and Lienhart, Rainer}, title = {DSFlash: Comprehensive Panoptic Scene Graph Generation in Realtime}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17388-17398} }
Reliev3R: Relieving Feed-forward 3D Reconstruction from Multi-View Geometric Annotations: Youyu Chen,

Junjun Jiang,

Yueru Luo,

Kui Jiang,

Xianming Liu,

Xu Yan,

Dave Zhenyu Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Youyu and Jiang, Junjun and Luo, Yueru and Jiang, Kui and Liu, Xianming and Yan, Xu and Chen, Dave Zhenyu}, title = {Reliev3R: Relieving Feed-forward 3D Reconstruction from Multi-View Geometric Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21860-21869} }
AdapTok: Learning Adaptive and Temporally Causal Video Tokenization in a 1D Latent Space: Yan Li,

Changyao Tian,

Renqiu Xia,

Ning Liao,

Weiwei Guo,

Hongsheng Li,

Jifeng Dai,

Hao Li,

Xue Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Tian, Changyao and Xia, Renqiu and Liao, Ning and Guo, Weiwei and Li, Hongsheng and Dai, Jifeng and Li, Hao and Yang, Xue}, title = {AdapTok: Learning Adaptive and Temporally Causal Video Tokenization in a 1D Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16163-16172} }
Attend Before Attention: Efficient and Scalable Video Understanding via Autoregressive Gazing: Baifeng Shi,

Stephanie Fu,

Long Lian,

Hanrong Ye,

David Eigen,

Aaron Reite,

Jan Kautz,

Boyi Li,

David M. Chan,

Trevor Darrell,

Pavlo Molchanov,

Hongxu Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Baifeng and Fu, Stephanie and Lian, Long and Ye, Hanrong and Eigen, David and Reite, Aaron and Kautz, Jan and Li, Boyi and Chan, David M. and Darrell, Trevor and Molchanov, Pavlo and Yin, Hongxu}, title = {Attend Before Attention: Efficient and Scalable Video Understanding via Autoregressive Gazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17022-17034} }
Text-guided Feature Disentanglement for Cross-modal Gait Recognition: Zhiyang Lu,

Ming Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zhiyang and Cheng, Ming}, title = {Text-guided Feature Disentanglement for Cross-modal Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25484-25493} }
Point Cloud as a Foreign Language for Multi-modal Large Language Model: Sneha Paul,

Zachary Patterson,

Nizar Bouguila; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paul_2026_CVPR, author = {Paul, Sneha and Patterson, Zachary and Bouguila, Nizar}, title = {Point Cloud as a Foreign Language for Multi-modal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16676-16687} }
Training-Free Open-Vocabulary Camouflaged Object Segmentation via Fine-Grained Object Binding and Adaptive Hybrid Prompt: Peng Ren,

Cheng Jiang,

Chuande Yang,

Fuming Sun,

Tian Bai; [pdf]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Peng and Jiang, Cheng and Yang, Chuande and Sun, Fuming and Bai, Tian}, title = {Training-Free Open-Vocabulary Camouflaged Object Segmentation via Fine-Grained Object Binding and Adaptive Hybrid Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24960-24969} }
RAVEN: Radar Adaptive Vision Encoders for Efficient Chirp-wise Object Detection and Segmentation: Anuvab Sen,

Mir Sayeed Mohammad,

Saibal Mukhopadhyay; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sen_2026_CVPR, author = {Sen, Anuvab and Mohammad, Mir Sayeed and Mukhopadhyay, Saibal}, title = {RAVEN: Radar Adaptive Vision Encoders for Efficient Chirp-wise Object Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17938-17947} }
Generative Video Motion Editing with 3D Point Tracks: Yao-Chih Lee,

Zhoutong Zhang,

Jiahui Huang,

Jui-Hsien Wang,

Joon-Young Lee,

Jia-Bin Huang,

Eli Shechtman,

Zhengqi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yao-Chih and Zhang, Zhoutong and Huang, Jiahui and Wang, Jui-Hsien and Lee, Joon-Young and Huang, Jia-Bin and Shechtman, Eli and Li, Zhengqi}, title = {Generative Video Motion Editing with 3D Point Tracks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18306-18318} }
Event6D: Event-based Novel Object 6D Pose Tracking: Jae-Young Kang,

Hoonhee Cho,

Taeyeop Lee,

Minjun Kang,

Bowen Wen,

Youngho Kim,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Jae-Young and Cho, Hoonhee and Lee, Taeyeop and Kang, Minjun and Wen, Bowen and Kim, Youngho and Yoon, Kuk-Jin}, title = {Event6D: Event-based Novel Object 6D Pose Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15091-15104} }
FLOW: Optimal Transport-Driven Feature Warping for Generalized Remote Physiological Measurement: Bo Zhao,

Junzhe Cao,

Dan Guo,

Dongmin Huang,

Wenjin Wang,

Tao Tan,

Yue Sun,

Zitong Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Bo and Cao, Junzhe and Guo, Dan and Huang, Dongmin and Wang, Wenjin and Tan, Tao and Sun, Yue and Yu, Zitong}, title = {FLOW: Optimal Transport-Driven Feature Warping for Generalized Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28481-28491} }
Region-Wise Correspondence Prediction between Manga Line Art Images: Yingxuan Li,

Jiafeng Mao,

Qianru Qiu,

Yusuke Matsui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yingxuan and Mao, Jiafeng and Qiu, Qianru and Matsui, Yusuke}, title = {Region-Wise Correspondence Prediction between Manga Line Art Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15334-15342} }
Unsupervised Multi-agent and Single-agent Perception from Cooperative Views: Haochen Yang,

Baolu Li,

Lei Li,

Delin Ren,

Jiacheng Guo,

Minghai Qin,

Tianyun Zhang,

Hongkai Yu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haochen and Li, Baolu and Li, Lei and Ren, Delin and Guo, Jiacheng and Qin, Minghai and Zhang, Tianyun and Yu, Hongkai}, title = {Unsupervised Multi-agent and Single-agent Perception from Cooperative Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25810-25819} }
MeanFuser: Fast One-Step Multi-Modal Trajectory Generation and Adaptive Reconstruction via MeanFlow for End-to-End Autonomous Driving: Junli Wang,

Yinan Zheng,

Xueyi Liu,

Zebin Xing,

Pengfei Li,

Kun Ma,

Hangjun Ye,

Guang Chen,

Guang Li,

Long Chen,

Zhongpu Xia,

Qichao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Junli and Zheng, Yinan and Liu, Xueyi and Xing, Zebin and Li, Pengfei and Ma, Kun and Ye, Hangjun and Chen, Guang and Li, Guang and Chen, Long and Xia, Zhongpu and Zhang, Qichao}, title = {MeanFuser: Fast One-Step Multi-Modal Trajectory Generation and Adaptive Reconstruction via MeanFlow for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17884-17893} }
CLIP-like Model as a Foundational Density Ratio Estimator: Fumiya Uchiyama,

Rintaro Yanagi,

Shohei Taniguchi,

Shota Takashiro,

Masahiro Suzuki,

Hirokatsu Kataoka,

Yusuke Iwasawa,

Yutaka Matsuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Uchiyama_2026_CVPR, author = {Uchiyama, Fumiya and Yanagi, Rintaro and Taniguchi, Shohei and Takashiro, Shota and Suzuki, Masahiro and Kataoka, Hirokatsu and Iwasawa, Yusuke and Matsuo, Yutaka}, title = {CLIP-like Model as a Foundational Density Ratio Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15784-15793} }
Edges Compete for Trust: Group Relative Edge Optimization for Building Reconstruction from Point Clouds: Yujun Liu,

Ruisheng Wang,

Xiang Ao,

Haoyuan Shen,

Kuihao Wang,

Kun Zhou,

Qingquan Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yujun and Wang, Ruisheng and Ao, Xiang and Shen, Haoyuan and Wang, Kuihao and Zhou, Kun and Li, Qingquan}, title = {Edges Compete for Trust: Group Relative Edge Optimization for Building Reconstruction from Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17121-17131} }
CycleManip: Enabling Cycle-based Manipulation via Effective History Perception and Understanding: Yi-Lin Wei,

Haoran Liao,

Yuhao Lin,

Pengyue Wang,

Zhizhao Liang,

Guiliang Liu,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yi-Lin and Liao, Haoran and Lin, Yuhao and Wang, Pengyue and Liang, Zhizhao and Liu, Guiliang and Zheng, Wei-Shi}, title = {CycleManip: Enabling Cycle-based Manipulation via Effective History Perception and Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20780-20789} }
Wave-Former: Through-Occlusion 3D Reconstruction via Wireless Shape Completion: Laura Dodds,

Maisy Lam,

Waleed Akbar,

Yibo Cheng,

Fadel Adib; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dodds_2026_CVPR, author = {Dodds, Laura and Lam, Maisy and Akbar, Waleed and Cheng, Yibo and Adib, Fadel}, title = {Wave-Former: Through-Occlusion 3D Reconstruction via Wireless Shape Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21713-21724} }
ELVIS: Enhance Low-Light for Video Instance Segmentation in the Dark: Joanne Lin,

Ruirui Lin,

Yini Li,

David Bull,

Nantheera Anantrasirichai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Joanne and Lin, Ruirui and Li, Yini and Bull, David and Anantrasirichai, Nantheera}, title = {ELVIS: Enhance Low-Light for Video Instance Segmentation in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25011-25021} }
End-to-End Hyper-Relational Information Extraction for Engineering Diagrams via Dynamically Tokenized Relation Transformer: Tianyou Bai,

Yan-Ming Zhang,

Zixiang Zhang,

Jibin Zhou,

Fei Yin,

Cheng-Lin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Tianyou and Zhang, Yan-Ming and Zhang, Zixiang and Zhou, Jibin and Yin, Fei and Liu, Cheng-Lin}, title = {End-to-End Hyper-Relational Information Extraction for Engineering Diagrams via Dynamically Tokenized Relation Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24438-24448} }
SkySense-VITA: Towards Universal In-context Segmentation of Multi-modal Remote Sensing Imagery: Kang Wu,

Lei Yu,

Junwei Luo,

Bo Dang,

Junjian Zhang,

Xiangyuan Cai,

Hongwei Hu,

Jingdong Chen,

Yansheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Kang and Yu, Lei and Luo, Junwei and Dang, Bo and Zhang, Junjian and Cai, Xiangyuan and Hu, Hongwei and Chen, Jingdong and Li, Yansheng}, title = {SkySense-VITA: Towards Universal In-context Segmentation of Multi-modal Remote Sensing Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20553-20563} }
Phrase-grounded APO for Improving Chest X-ray Report Generation: Raziuddin Mahmood,

Tanveer Syeda-Mahmood; [pdf]
[bibtex]
@InProceedings{Mahmood_2026_CVPR, author = {Mahmood, Raziuddin and Syeda-Mahmood, Tanveer}, title = {Phrase-grounded APO for Improving Chest X-ray Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28254-28263} }
MMLandmarks: a Cross-View Instance-Level Benchmark for Geo-Spatial Understanding: Oskar Kristoffersen,

Alba Reinders Sánchez,

Morten Rieger Hannemose,

Anders Bjorholm Dahl,

Dim P. Papadopoulos; [pdf] [supp]
[bibtex]
@InProceedings{Kristoffersen_2026_CVPR, author = {Kristoffersen, Oskar and S\'anchez, Alba Reinders and Hannemose, Morten Rieger and Dahl, Anders Bjorholm and Papadopoulos, Dim P.}, title = {MMLandmarks: a Cross-View Instance-Level Benchmark for Geo-Spatial Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26452-26464} }
Flow Matching for Multimodal Distributions: Gaoxiang Luo,

Frank Cole,

Sihang Zhang,

Yuxiang Wan,

Yulong Lu,

Ju Sun; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Gaoxiang and Cole, Frank and Zhang, Sihang and Wan, Yuxiang and Lu, Yulong and Sun, Ju}, title = {Flow Matching for Multimodal Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23260-23271} }
ParkGaussian: Surround-view 3D Gaussian Splatting for Autonomous Parking: Xiaobao Wei,

Zhangjie Ye,

Yuxiang Gu,

Zunjie Zhu,

Yunfei Guo,

Yingying Shen,

Shan Zhao,

Ming Lu,

Haiyang Sun,

Bing Wang,

Guang Chen,

Rongfeng Lu,

Hangjun Ye; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Xiaobao and Ye, Zhangjie and Gu, Yuxiang and Zhu, Zunjie and Guo, Yunfei and Shen, Yingying and Zhao, Shan and Lu, Ming and Sun, Haiyang and Wang, Bing and Chen, Guang and Lu, Rongfeng and Ye, Hangjun}, title = {ParkGaussian: Surround-view 3D Gaussian Splatting for Autonomous Parking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19085-19095} }
Hearing the Room Through the Shape of the Drum: Modal-Guided Sound Recovery from Multi-Point Surface Vibrations: Shai Bagon,

Matan Kichler,

Mark Sheinin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bagon_2026_CVPR, author = {Bagon, Shai and Kichler, Matan and Sheinin, Mark}, title = {Hearing the Room Through the Shape of the Drum: Modal-Guided Sound Recovery from Multi-Point Surface Vibrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14451-14460} }
MorphAny3D: Unleashing the Power of Structured Latent in 3D Morphing: Xiaokun Sun,

Zeyu Cai,

Hao Tang,

Ying Tai,

Jian Yang,

Zhenyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaokun and Cai, Zeyu and Tang, Hao and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {MorphAny3D: Unleashing the Power of Structured Latent in 3D Morphing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27018-27029} }
Push-and-Step: From RL-Based Balance Recovery to Physical Simulation of Dense Crowds: Alexis Jensen,

Pei Xu,

Ioannis Karamouzas,

Charles Pontonnier,

Julien Pettré; [pdf] [supp]
[bibtex]
@InProceedings{Jensen_2026_CVPR, author = {Jensen, Alexis and Xu, Pei and Karamouzas, Ioannis and Pontonnier, Charles and Pettr\'e, Julien}, title = {Push-and-Step: From RL-Based Balance Recovery to Physical Simulation of Dense Crowds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16387-16397} }
HFR and HDR Video from Multi-Attenuated Spikes Using a Rapidly Rotating SpokeND Filter: Yakun Chang,

Zhaojun Huang,

Siqi Yang,

Yeliduosi Xiaokaiti,

Shikui Wei,

Yao Zhao,

Tiejun Huang,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Yakun and Huang, Zhaojun and Yang, Siqi and Xiaokaiti, Yeliduosi and Wei, Shikui and Zhao, Yao and Huang, Tiejun and Shi, Boxin}, title = {HFR and HDR Video from Multi-Attenuated Spikes Using a Rapidly Rotating SpokeND Filter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19716-19725} }
FEAST: Fully Connected Expressive Attention for Spatial Transcriptomics: Taejin Jeong,

Joohyeok Kim,

Jinyeong Kim,

Chanyoung Kim,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Taejin and Kim, Joohyeok and Kim, Jinyeong and Kim, Chanyoung and Hwang, Seong Jae}, title = {FEAST: Fully Connected Expressive Attention for Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26793-26802} }
InterAgent: Physics-based Multi-agent Command Execution via Diffusion on Interaction Graphs: Bin Li,

Ruichi Zhang,

Han Liang,

Jingyan Zhang,

Juze Zhang,

Xin Chen,

Lan Xu,

Jingyi Yu,

Jingya Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bin and Zhang, Ruichi and Liang, Han and Zhang, Jingyan and Zhang, Juze and Chen, Xin and Xu, Lan and Yu, Jingyi and Wang, Jingya}, title = {InterAgent: Physics-based Multi-agent Command Execution via Diffusion on Interaction Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15253-15265} }
Open-world Hand-Object Interaction Video Generation Based on Structure and Contact-aware Representation: Haodong Yan,

Hang Yu,

Zhide Zhong,

Weilin Yuan,

Xin Gong,

Zehang Luo,

Chengxi Heyu,

Junfeng Li,

Wenxuan Song,

Shunbo Zhou,

Haoang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haodong and Yu, Hang and Zhong, Zhide and Yuan, Weilin and Gong, Xin and Luo, Zehang and Heyu, Chengxi and Li, Junfeng and Song, Wenxuan and Zhou, Shunbo and Li, Haoang}, title = {Open-world Hand-Object Interaction Video Generation Based on Structure and Contact-aware Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16031-16041} }
Unifying Language-Action Understanding and Generation for Autonomous Driving: Xinyang Wang,

Qian Liu,

Wenjie Ding,

Zhao Yang,

Wei Li,

Chang Liu,

Bailin Li,

Kun Zhan,

Xianpeng Lang,

Wei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyang and Liu, Qian and Ding, Wenjie and Yang, Zhao and Li, Wei and Liu, Chang and Li, Bailin and Zhan, Kun and Lang, Xianpeng and Chen, Wei}, title = {Unifying Language-Action Understanding and Generation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25193-25203} }
Tackling Model Bias via Game-theoretic Multi-agent Collaboration Framework for Hateful Meme Classification: Yiwei Wei,

Zhengliang Guo,

Shaozu Yuan,

Chengyin Hu,

Zhiyang Jia,

Jiujiang Guo,

Meng Chen,

Peiying Wang,

Longbiao Wang; [pdf]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yiwei and Guo, Zhengliang and Yuan, Shaozu and Hu, Chengyin and Jia, Zhiyang and Guo, Jiujiang and Chen, Meng and Wang, Peiying and Wang, Longbiao}, title = {Tackling Model Bias via Game-theoretic Multi-agent Collaboration Framework for Hateful Meme Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22143-22152} }
ProFocus: Proactive Perception and Focused Reasoning in Vision-and-Language Navigation: Wei Xue,

Mingcheng Li,

Xuecheng Wu,

Jingqun Tang,

Dingkang Yang,

Lihua Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Wei and Li, Mingcheng and Wu, Xuecheng and Tang, Jingqun and Yang, Dingkang and Zhang, Lihua}, title = {ProFocus: Proactive Perception and Focused Reasoning in Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18129-18139} }
PackUV: Packed Gaussian UV Maps for 4D Volumetric Video: Aashish Rai,

Angela Xing,

Anushka Agarwal,

Xiaoyan Cong,

Zekun Li,

Tao Lu,

Aayush Prakash,

Srinath Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rai_2026_CVPR, author = {Rai, Aashish and Xing, Angela and Agarwal, Anushka and Cong, Xiaoyan and Li, Zekun and Lu, Tao and Prakash, Aayush and Sridhar, Srinath}, title = {PackUV: Packed Gaussian UV Maps for 4D Volumetric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22581-22593} }
Ego2Web: A Web Agent Benchmark Grounded in Egocentric Videos: Shoubin Yu,

Lei Shu,

Antoine Yang,

Yao Fu,

Srinivas Sunkara,

Maria Wang,

Jindong Chen,

Mohit Bansal,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Shoubin and Shu, Lei and Yang, Antoine and Fu, Yao and Sunkara, Srinivas and Wang, Maria and Chen, Jindong and Bansal, Mohit and Gong, Boqing}, title = {Ego2Web: A Web Agent Benchmark Grounded in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25633-25643} }
Unlocking Strong Supervision: A Data-Centric Study of General-Purpose Audio Pre-Training Methods: Xuanru Zhou,

Yiwen Shao,

Wei-Cheng Tseng,

Dong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuanru and Shao, Yiwen and Tseng, Wei-Cheng and Yu, Dong}, title = {Unlocking Strong Supervision: A Data-Centric Study of General-Purpose Audio Pre-Training Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24780-24791} }
UniDAC: Universal Metric Depth Estimation for Any Camera: Girish Chandar Ganesan,

Yuliang Guo,

Liu Ren,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganesan_2026_CVPR, author = {Ganesan, Girish Chandar and Guo, Yuliang and Ren, Liu and Liu, Xiaoming}, title = {UniDAC: Universal Metric Depth Estimation for Any Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26953-26963} }
Beyond Scanpaths: Graph-Based Gaze Simulation in Dynamic Scenes: Luke Palmer,

Petar Palasek,

Hazem Abdelkawy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Palmer_2026_CVPR, author = {Palmer, Luke and Palasek, Petar and Abdelkawy, Hazem}, title = {Beyond Scanpaths: Graph-Based Gaze Simulation in Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15199-15211} }
StreamingTOM: Streaming Token Compression for Efficient Video Understanding: Xueyi Chen,

Keda Tao,

Kele Shao,

Huan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xueyi and Tao, Keda and Shao, Kele and Wang, Huan}, title = {StreamingTOM: Streaming Token Compression for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24675-24685} }
Haptic Neural Fields: Bringing Tactile Interactions to 3D Rendered Scenes: Antonio Luigi Stefani,

Niccolò Bisagno,

Nicola Conci,

Eckehard Steinbach,

Francesco De Natale; [pdf] [supp]
[bibtex]
@InProceedings{Stefani_2026_CVPR, author = {Stefani, Antonio Luigi and Bisagno, Niccol\`o and Conci, Nicola and Steinbach, Eckehard and De Natale, Francesco}, title = {Haptic Neural Fields: Bringing Tactile Interactions to 3D Rendered Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16021-16030} }
HUMORCHAIN: Theory-Guided Multi-Stage Reasoning for Interpretable Multimodal Humor Generation: Jiajun Zhang,

Shijia Luo,

Ruikang Zhang,

Qi Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiajun and Luo, Shijia and Zhang, Ruikang and Su, Qi}, title = {HUMORCHAIN: Theory-Guided Multi-Stage Reasoning for Interpretable Multimodal Humor Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19176-19185} }
Goal Force: Teaching Video Models To Accomplish Physics-Conditioned Goals: Nate Gillman,

Yinghua Zhou,

Zitian Tang,

Evan Luo,

Arjan Chakravarthy,

Daksh Aggarwal,

Michael Freeman,

Chen Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gillman_2026_CVPR, author = {Gillman, Nate and Zhou, Yinghua and Tang, Zitian and Luo, Evan and Chakravarthy, Arjan and Aggarwal, Daksh and Freeman, Michael and Sun, Chen}, title = {Goal Force: Teaching Video Models To Accomplish Physics-Conditioned Goals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20077-20087} }
Selectively Extracting and Injecting Visual Attributes into Text-to-Image Models: Seunghwan Choi,

Jooyeol Yun,

Youngdo Lee,

Jaegul Choo; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Seunghwan and Yun, Jooyeol and Lee, Youngdo and Choo, Jaegul}, title = {Selectively Extracting and Injecting Visual Attributes into Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21976-21985} }
LoFA: Learning to Predict Personalized Prior for Fast Adaptation of Visual Generative Models: Yiming Hao,

Mutian Xu,

Chongjie Ye,

Jie Qin,

Shunlin Lu,

Yipeng Qin,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Yiming and Xu, Mutian and Ye, Chongjie and Qin, Jie and Lu, Shunlin and Qin, Yipeng and Han, Xiaoguang}, title = {LoFA: Learning to Predict Personalized Prior for Fast Adaptation of Visual Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21986-21996} }
Face-Guided Sentiment Boundary Enhancement for Weakly-Supervised Temporal Sentiment Localization: Cailing Han,

Zhangbin Li,

Jinxing Zhou,

Wei Qian,

Jingjing Hu,

Yanghao Zhou,

Zhangling Duan,

Dan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Cailing and Li, Zhangbin and Zhou, Jinxing and Qian, Wei and Hu, Jingjing and Zhou, Yanghao and Duan, Zhangling and Guo, Dan}, title = {Face-Guided Sentiment Boundary Enhancement for Weakly-Supervised Temporal Sentiment Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24759-24769} }
SIMSPINE: A Biomechanics-Aware Simulation Framework for 3D Spine Motion Annotation and Benchmarking: Muhammad Saif Ullah Khan,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2026_CVPR, author = {Khan, Muhammad Saif Ullah and Stricker, Didier}, title = {SIMSPINE: A Biomechanics-Aware Simulation Framework for 3D Spine Motion Annotation and Benchmarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21176-21187} }
Composing Concepts from Images and Videos via Concept-prompt Binding: Xianghao Kong,

Zeyu Zhang,

Yuwei Guo,

Zhuoran Zhao,

Songchun Zhang,

Anyi Rao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Xianghao and Zhang, Zeyu and Guo, Yuwei and Zhao, Zhuoran and Zhang, Songchun and Rao, Anyi}, title = {Composing Concepts from Images and Videos via Concept-prompt Binding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14800-14810} }
AlcheMinT: Fine-grained Temporal Control for Multi-Reference Consistent Video Generation: Sharath Girish,

Viacheslav Ivanov,

Tsai-Shien Chen,

Hao Chen,

Aliaksandr Siarohin,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Girish_2026_CVPR, author = {Girish, Sharath and Ivanov, Viacheslav and Chen, Tsai-Shien and Chen, Hao and Siarohin, Aliaksandr and Tulyakov, Sergey}, title = {AlcheMinT: Fine-grained Temporal Control for Multi-Reference Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23227-23237} }
Understanding the Role of Hallucination in Reinforcement Post-Training of Multimodal Reasoning Models: Gengwei Zhang,

Jie Peng,

Zhen Tan,

Mufan Qiu,

Hossein Nourkhiz Mahjoub,

Vaishnav Tadiparthi,

Kwonjoon Lee,

Yanyong Zhang,

Tianlong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gengwei and Peng, Jie and Tan, Zhen and Qiu, Mufan and Mahjoub, Hossein Nourkhiz and Tadiparthi, Vaishnav and Lee, Kwonjoon and Zhang, Yanyong and Chen, Tianlong}, title = {Understanding the Role of Hallucination in Reinforcement Post-Training of Multimodal Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25326-25335} }
From Sketch to Fresco: Efficient Diffusion Transformer with Progressive Resolution: Shikang Zheng,

Guantao Chen,

Landis He,

Jiacheng Liu,

Yuqi Lin,

Chang Zou,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Shikang and Chen, Guantao and He, Landis and Liu, Jiacheng and Lin, Yuqi and Zou, Chang and Zhang, Linfeng}, title = {From Sketch to Fresco: Efficient Diffusion Transformer with Progressive Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18714-18723} }
From Pixel to Precision: Enhancing Handwritten Mathematical Expression Recognition with Image-Level Reward: Ze Liu,

Kai Zhang,

Xianquan Wang,

Shuochen Liu,

Jiaxian Yan,

Yupeng Han,

Qi Liu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ze and Zhang, Kai and Wang, Xianquan and Liu, Shuochen and Yan, Jiaxian and Han, Yupeng and Liu, Qi}, title = {From Pixel to Precision: Enhancing Handwritten Mathematical Expression Recognition with Image-Level Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25948-25957} }
Rethinking Model Selection in VLM Through the Lens of Gromov-Wasserstein Distance: Muyang Li,

Yucheng Liu,

Jianbo Ma,

Elliot Osborne,

Bo Han,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Muyang and Liu, Yucheng and Ma, Jianbo and Osborne, Elliot and Han, Bo and Liu, Tongliang}, title = {Rethinking Model Selection in VLM Through the Lens of Gromov-Wasserstein Distance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17237-17247} }
TSTM: Temporal Segmentation for Task-relevant Mask in Visual Reinforcement Learning Generalization: Weicheng Du,

Wenjia Meng,

Zhengzhe Zhang,

Yilong Yin,

Xiankai Lu; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Weicheng and Meng, Wenjia and Zhang, Zhengzhe and Yin, Yilong and Lu, Xiankai}, title = {TSTM: Temporal Segmentation for Task-relevant Mask in Visual Reinforcement Learning Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27999-28009} }
Ghosts in the Point Clouds: De-glaring LiDAR in the Transient Domain: Avery Gump,

Connor Henley,

Sungjin Cheong,

Akarsh Prabhakara,

Mohit Gupta; [pdf] [supp]
[bibtex]
@InProceedings{Gump_2026_CVPR, author = {Gump, Avery and Henley, Connor and Cheong, Sungjin and Prabhakara, Akarsh and Gupta, Mohit}, title = {Ghosts in the Point Clouds: De-glaring LiDAR in the Transient Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17174-17183} }
ALLNet: Multi-task Dense Prediction for Degraded Images: Weiran Wang,

Jialing Wu,

Yaqi Chang,

Gang He,

Li Xu,

Chang Wu,

Yunsong Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Weiran and Wu, Jialing and Chang, Yaqi and He, Gang and Xu, Li and Wu, Chang and Li, Yunsong}, title = {ALLNet: Multi-task Dense Prediction for Degraded Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20421-20432} }
GMT: Effective Global Framework for Multi-Camera Multi-Target Tracking: Yihao Zhen,

Mingyue Xu,

Qiang Wang,

Baojie Fan,

Jiahua Dong,

Tinghui Zhao,

Huijie Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhen_2026_CVPR, author = {Zhen, Yihao and Xu, Mingyue and Wang, Qiang and Fan, Baojie and Dong, Jiahua and Zhao, Tinghui and Fan, Huijie}, title = {GMT: Effective Global Framework for Multi-Camera Multi-Target Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28201-28210} }
Geometric-Photometric Event-based 3D Gaussian Ray Tracing: Kai Kohyama,

Yoshimitsu Aoki,

Guillermo Gallego,

Shintaro Shiba; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kohyama_2026_CVPR, author = {Kohyama, Kai and Aoki, Yoshimitsu and Gallego, Guillermo and Shiba, Shintaro}, title = {Geometric-Photometric Event-based 3D Gaussian Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22301-22311} }
Towards Universal Computational Aberration Correction in Photographic Cameras: A Comprehensive Benchmark Analysis: Xiaolong Qian,

Qi Jiang,

Yao Gao,

Lei Sun,

Zhonghua Yi,

Kailun Yang,

Luc Van Gool,

Kaiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Xiaolong and Jiang, Qi and Gao, Yao and Sun, Lei and Yi, Zhonghua and Yang, Kailun and Van Gool, Luc and Wang, Kaiwei}, title = {Towards Universal Computational Aberration Correction in Photographic Cameras: A Comprehensive Benchmark Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26772-26782} }
TAlignDiff: Automatic Tooth Alignment assisted by Diffusion-based Transformation Learning: Yunbi Liu,

Enqi Tang,

Shiyu Li,

Hui Shuai,

Lei Ma,

Juncheng Li,

Kuai Yu,

Shu Lou,

Yongchu Pan,

Qingshan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yunbi and Tang, Enqi and Li, Shiyu and Shuai, Hui and Ma, Lei and Li, Juncheng and Yu, Kuai and Lou, Shu and Pan, Yongchu and Liu, Qingshan}, title = {TAlignDiff: Automatic Tooth Alignment assisted by Diffusion-based Transformation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22784-22793} }
BA-GS: Bayesian Adaptive Gaussian Splatting for SFM-Free 3D Reconstruction: Zhongjie Ma,

Di Lin,

Xin Wang,

Haotian Dong,

Chong Wang,

Dongdong Wu,

Changqing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Zhongjie and Lin, Di and Wang, Xin and Dong, Haotian and Wang, Chong and Wu, Dongdong and Zhang, Changqing}, title = {BA-GS: Bayesian Adaptive Gaussian Splatting for SFM-Free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26124-26133} }
VGGT-Segmentor: Geometry-Enhanced Cross-View Segmentation: Yulu Gao,

Bohao Zhang,

Zongheng Tang,

Jitong Liao,

Wenjun Wu,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yulu and Zhang, Bohao and Tang, Zongheng and Liao, Jitong and Wu, Wenjun and Liu, Si}, title = {VGGT-Segmentor: Geometry-Enhanced Cross-View Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21690-21700} }
BuildAnyPoint: 3D Building Structured Abstraction from Diverse Point Clouds: Tongyan Hua,

Haoran Gong,

Yuan Liu,

Di Wang,

Ying-Cong Chen,

Wufan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2026_CVPR, author = {Hua, Tongyan and Gong, Haoran and Liu, Yuan and Wang, Di and Chen, Ying-Cong and Zhao, Wufan}, title = {BuildAnyPoint: 3D Building Structured Abstraction from Diverse Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17099-17109} }
BarbieGait: An Identity-Consistent Synthetic Human Dataset with Versatile Cloth-Changing for Gait Recognition: Qingyuan Cai,

Saihui Hou,

Xuecai Hu,

Yongzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Qingyuan and Hou, Saihui and Hu, Xuecai and Huang, Yongzhen}, title = {BarbieGait: An Identity-Consistent Synthetic Human Dataset with Versatile Cloth-Changing for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28402-28412} }
VINS-120K: Ultra High-Resolution Image Editing with A Large-Scale Dataset: Zhizhou Chen,

Shanyan Guan,

Zhanxin Gao,

En Ci,

Yanhao Ge,

Wei Li,

Zhenyu Zhang,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhizhou and Guan, Shanyan and Gao, Zhanxin and Ci, En and Ge, Yanhao and Li, Wei and Zhang, Zhenyu and Yang, Jian and Tai, Ying}, title = {VINS-120K: Ultra High-Resolution Image Editing with A Large-Scale Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15302-15312} }
JANUS: A Lightweight Framework for Jailbreaking Text-to-Image Models via Distribution Optimization: Haolun Zheng,

Yu He,

Tailun Chen,

Shuo Shao,

Zhixuan Chu,

Hongbin Zhou,

Lan Tao,

Zhan Qin,

Kui Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haolun and He, Yu and Chen, Tailun and Shao, Shuo and Chu, Zhixuan and Zhou, Hongbin and Tao, Lan and Qin, Zhan and Ren, Kui}, title = {JANUS: A Lightweight Framework for Jailbreaking Text-to-Image Models via Distribution Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15719-15729} }
When Anonymity Breaks: Identifying Models Behind Text-to-Image Leaderboards: Ali Naseh,

Anshuman Suri,

Yuefeng Peng,

Harsh Chaudhari,

Alina Oprea,

Amir Houmansadr; [pdf] [supp]
[bibtex]
@InProceedings{Naseh_2026_CVPR, author = {Naseh, Ali and Suri, Anshuman and Peng, Yuefeng and Chaudhari, Harsh and Oprea, Alina and Houmansadr, Amir}, title = {When Anonymity Breaks: Identifying Models Behind Text-to-Image Leaderboards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24449-24459} }
PC-Talk: Precise Facial Animation Control for Audio-Driven Talking Face Generation: Baiqin Wang,

Xiangyu Zhu,

Fan Shen,

Hao Xu,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Baiqin and Zhu, Xiangyu and Shen, Fan and Xu, Hao and Lei, Zhen}, title = {PC-Talk: Precise Facial Animation Control for Audio-Driven Talking Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25153-25162} }
CoLoGen: Progressive Learning of Concept-Localization Duality for Unified Image Generation: Yuxin Song,

Yu Lu,

Haoyuan Sun,

Huanjin Yao,

Fanglong Liu,

Yifan Sun,

Haocheng Feng,

Hang Zhou,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yuxin and Lu, Yu and Sun, Haoyuan and Yao, Huanjin and Liu, Fanglong and Sun, Yifan and Feng, Haocheng and Zhou, Hang and Wang, Jingdong}, title = {CoLoGen: Progressive Learning of Concept-Localization Duality for Unified Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14724-14734} }
FSFSplatter: Geometrically Accurate Reconstruction with Free Sparse-view Images within 2 minutes: Yibin Zhao,

Yihan Pan,

Jun Nan,

Liwei Chen,

Jianjun Yi; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yibin and Pan, Yihan and Nan, Jun and Chen, Liwei and Yi, Jianjun}, title = {FSFSplatter: Geometrically Accurate Reconstruction with Free Sparse-view Images within 2 minutes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26134-26143} }
R4Det: 4D Radar-Camera Fusion for High-Performance 3D Object Detection: Zhongyu Xia,

Yousen Tang,

Yongtao Wang,

Zhifeng Wang,

Weijun Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zhongyu and Tang, Yousen and Wang, Yongtao and Wang, Zhifeng and Qin, Weijun}, title = {R4Det: 4D Radar-Camera Fusion for High-Performance 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18766-18775} }
When CLIP Sees More, It Fights Back Harder: Multi-View Guided Adaptive Counterattacks for Test-Time Adversarial Robustness: Sunoh Kim,

Daeho Um; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Sunoh and Um, Daeho}, title = {When CLIP Sees More, It Fights Back Harder: Multi-View Guided Adaptive Counterattacks for Test-Time Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15689-15699} }
LLaDA-MedV: Exploring Large Language Diffusion Models for Biomedical Image Understanding: Xuanzhao Dong,

Wenhui Zhu,

Xiwen Chen,

Zhipeng Wang,

Peijie Qiu,

Shao Tang,

Xin Li,

Yalin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Xuanzhao and Zhu, Wenhui and Chen, Xiwen and Wang, Zhipeng and Qiu, Peijie and Tang, Shao and Li, Xin and Wang, Yalin}, title = {LLaDA-MedV: Exploring Large Language Diffusion Models for Biomedical Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22773-22783} }
Fine-Tuning Impairs the Balancedness of Foundation Models in Long-tailed Personalized Federated Learning: Shihao Hou,

Chikai Shang,

Zhiheng Yang,

Jiacheng Yang,

Xinyi Shang,

Junlong Gao,

Yiqun Zhang,

Yang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Shihao and Shang, Chikai and Yang, Zhiheng and Yang, Jiacheng and Shang, Xinyi and Gao, Junlong and Zhang, Yiqun and Lu, Yang}, title = {Fine-Tuning Impairs the Balancedness of Foundation Models in Long-tailed Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17505-17514} }
Modeling Cross-vision Synergy for Unified Large Vision Model: Shengqiong Wu,

Lanhu Wu,

Mingyang Bao,

Wenhao Xu,

Hanwang Zhang,

Shuicheng Yan,

Hao Fei,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Shengqiong and Wu, Lanhu and Bao, Mingyang and Xu, Wenhao and Zhang, Hanwang and Yan, Shuicheng and Fei, Hao and Chua, Tat-Seng}, title = {Modeling Cross-vision Synergy for Unified Large Vision Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22941-22952} }
Radar-Guided Polynomial Fitting for Metric Depth Estimation: Patrick Rim,

Hyoungseob Park,

Vadim Ezhov,

Jeffrey Moon,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rim_2026_CVPR, author = {Rim, Patrick and Park, Hyoungseob and Ezhov, Vadim and Moon, Jeffrey and Wong, Alex}, title = {Radar-Guided Polynomial Fitting for Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26941-26952} }
A Polynomial Chaos Framework for Causal Discovery in Nonlinear Uncertain Systems: Liang Cao; [pdf]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Liang}, title = {A Polynomial Chaos Framework for Causal Discovery in Nonlinear Uncertain Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17474-17483} }
Enhancing Out-of-Distribution Detection with Extended Logit Normalization: Yifan Ding,

Xixi Liu,

Jonas Unger,

Gabriel Eilertsen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Yifan and Liu, Xixi and Unger, Jonas and Eilertsen, Gabriel}, title = {Enhancing Out-of-Distribution Detection with Extended Logit Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24823-24832} }
Towards Robust Multimodal Large Language Models Against Jailbreak Attacks: Ziyi Yin,

Yuanpu Cao,

Han Liu,

Ting Wang,

Jinghui Chen,

Fenglong Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Ziyi and Cao, Yuanpu and Liu, Han and Wang, Ting and Chen, Jinghui and Ma, Fenglong}, title = {Towards Robust Multimodal Large Language Models Against Jailbreak Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22847-22856} }
VideoARM: Agentic Reasoning over Hierarchical Memory for Long-Form Video Understanding: Yufei Yin,

Qianke Meng,

Minghao Chen,

Jiajun Ding,

Zhenwei Shao,

Zhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Yufei and Meng, Qianke and Chen, Minghao and Ding, Jiajun and Shao, Zhenwei and Yu, Zhou}, title = {VideoARM: Agentic Reasoning over Hierarchical Memory for Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24042-24051} }
240FPS Stereo Vision from Monocular Mixed Spikes: Yeliduosi Xiaokaiti,

Yakun Chang,

Yang Bai,

Zhaojun Huang,

Peiqi Duan,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Xiaokaiti_2026_CVPR, author = {Xiaokaiti, Yeliduosi and Chang, Yakun and Bai, Yang and Huang, Zhaojun and Duan, Peiqi and Shi, Boxin}, title = {240FPS Stereo Vision from Monocular Mixed Spikes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26688-26697} }
Balanced Dataset Distillation via Modeling Multiple Visual Pattern Distribution: Guanghui Shi,

Xuefeng Liang,

Qixiang Wen; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Guanghui and Liang, Xuefeng and Wen, Qixiang}, title = {Balanced Dataset Distillation via Modeling Multiple Visual Pattern Distribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19634-19643} }
X-Part: High Fidelity And Structure Coherent Shape Decomposition And Completion: Xinhao Yan,

Jiachen Xu,

Yang Li,

Changfeng Ma,

Yunhan Yang,

Chunshi Wang,

Zibo Zhao,

Zeqiang Lai,

Yunfei Zhao,

Zhuo Chen,

Chunchao Guo; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Xinhao and Xu, Jiachen and Li, Yang and Ma, Changfeng and Yang, Yunhan and Wang, Chunshi and Zhao, Zibo and Lai, Zeqiang and Zhao, Yunfei and Chen, Zhuo and Guo, Chunchao}, title = {X-Part: High Fidelity And Structure Coherent Shape Decomposition And Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27062-27071} }
FE2E: From Editor to Dense Geometry Estimator: Jiyuan Wang,

Chunyu Lin,

Lei Sun,

Rongying Liu,

Lang Nie,

Mingxing Li,

Kang Liao,

Xiangxiang Chu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiyuan and Lin, Chunyu and Sun, Lei and Liu, Rongying and Nie, Lang and Li, Mingxing and Liao, Kang and Chu, Xiangxiang}, title = {FE2E: From Editor to Dense Geometry Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19844-19853} }
Adaptive Confidence Regularization for Multimodal Failure Detection: Moru Liu,

Hao Dong,

Olga Fink,

Mario Trapp; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Moru and Dong, Hao and Fink, Olga and Trapp, Mario}, title = {Adaptive Confidence Regularization for Multimodal Failure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15850-15859} }
LoST: Level of Semantics Tokenization for 3D Shapes: Niladri Shekhar Dutt,

Zifan Shi,

Paul Guerrero,

Chun-Hao Paul Huang,

Duygu Ceylan,

Niloy J. Mitra,

Xuelin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dutt_2026_CVPR, author = {Dutt, Niladri Shekhar and Shi, Zifan and Guerrero, Paul and Huang, Chun-Hao Paul and Ceylan, Duygu and Mitra, Niloy J. and Chen, Xuelin}, title = {LoST: Level of Semantics Tokenization for 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19950-19959} }
Differentiable Stroke Planning with Dual Parameterization for Efficient and High-Fidelity Painting Creation: Jinfan Liu,

Wuze Zhang,

Zhangli Hu,

Zhehan Zhao,

Ye Chen,

Bingbing Ni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinfan and Zhang, Wuze and Hu, Zhangli and Zhao, Zhehan and Chen, Ye and Ni, Bingbing}, title = {Differentiable Stroke Planning with Dual Parameterization for Efficient and High-Fidelity Painting Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26721-26730} }
Adaptive Depth Lightweight RGB-T Tracking with Holistic Token Routing: Tian Ding,

Hongtao Yang,

Liangtao Shi,

Jun Li,

Xiantao Hu,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Tian and Yang, Hongtao and Shi, Liangtao and Li, Jun and Hu, Xiantao and Yang, Jian and Tai, Ying}, title = {Adaptive Depth Lightweight RGB-T Tracking with Holistic Token Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20942-20952} }
Learning to Generate Highly Dynamic Videos using Synthetic Motion Data: Wonjoon Jin,

Jiyun Won,

Janghyeok Han,

Qi Dai,

Chong Luo,

Seung-Hwan Baek,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Wonjoon and Won, Jiyun and Han, Janghyeok and Dai, Qi and Luo, Chong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Learning to Generate Highly Dynamic Videos using Synthetic Motion Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18331-18341} }
ProgTrack: A Multi-Object Tracking Algorithm with Progressive Matching Strategy: Chenhui Zhang,

Guoqing Dong,

Weijie Peng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenhui and Dong, Guoqing and Peng, Weijie}, title = {ProgTrack: A Multi-Object Tracking Algorithm with Progressive Matching Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20902-20911} }
MicroFM: Physics-guided Flow Matching for Isotropic Microscopy Reconstruction: Xingzu Zhan,

Runmin Jiang,

Vatsal Gupta,

Tanush Swaminathan,

Yanwen Wang,

Genpei Zhang,

Haili Wang,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Xingzu and Jiang, Runmin and Gupta, Vatsal and Swaminathan, Tanush and Wang, Yanwen and Zhang, Genpei and Wang, Haili and Xu, Min}, title = {MicroFM: Physics-guided Flow Matching for Isotropic Microscopy Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15639-15648} }
HyperGaussians: High-Dimensional Gaussian Splatting for High-Fidelity Animatable Face Avatars: Gent Serifi,

Marcel C. Buehler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Serifi_2026_CVPR, author = {Serifi, Gent and Buehler, Marcel C.}, title = {HyperGaussians: High-Dimensional Gaussian Splatting for High-Fidelity Animatable Face Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25236-25247} }
FocusUI: Efficient UI Grounding via Position-Preserving Visual Token Selection: Mingyu Ouyang,

Kevin Qinghong Lin,

Mike Zheng Shou,

Hwee Tou Ng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Mingyu and Lin, Kevin Qinghong and Shou, Mike Zheng and Ng, Hwee Tou}, title = {FocusUI: Efficient UI Grounding via Position-Preserving Visual Token Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20313-20323} }
ExMesh: EXplicit Mesh Reconstruction with Topology Adaptation: Chuanjin Fan,

Lifan Wu,

Wenjie Chang,

Hanzhi Chang,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Chuanjin and Wu, Lifan and Chang, Wenjie and Chang, Hanzhi and Yang, Wenfei and Zhang, Tianzhu}, title = {ExMesh: EXplicit Mesh Reconstruction with Topology Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27136-27145} }
Mesh4D: 4D Mesh Reconstruction and Tracking from Monocular Video: Zeren Jiang,

Chuanxia Zheng,

Iro Laina,

Diane Larlus,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zeren and Zheng, Chuanxia and Laina, Iro and Larlus, Diane and Vedaldi, Andrea}, title = {Mesh4D: 4D Mesh Reconstruction and Tracking from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14524-14535} }
Conflict-Aware Adaptive Cross-Reconstruction for Multimodal Sentiment Analysis: Yan Wang,

Fuyuan Cao,

Xingwang Zhao; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yan and Cao, Fuyuan and Zhao, Xingwang}, title = {Conflict-Aware Adaptive Cross-Reconstruction for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15880-15889} }
Neurodynamics-Driven Coupled Neural P Systems for Multi-Focus Image Fusion: Bo Li,

Yunkuo Lei,

Tingting Bao,

Hang Yan,

Yaxian Wang,

Weiping Fu,

Lingling Zhang,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bo and Lei, Yunkuo and Bao, Tingting and Yan, Hang and Wang, Yaxian and Fu, Weiping and Zhang, Lingling and Liu, Jun}, title = {Neurodynamics-Driven Coupled Neural P Systems for Multi-Focus Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26541-26550} }
EchoPOSE: 6D Pose Estimation of Sparse Echocardiograms for Left-Ventricular 3D Shape Reconstruction: Lucas Iijima,

Yihao Luo,

Dario Sesia,

Amit Kaura,

Jamil Mayet,

Choon Hwai Yap; [pdf] [supp]
[bibtex]
@InProceedings{Iijima_2026_CVPR, author = {Iijima, Lucas and Luo, Yihao and Sesia, Dario and Kaura, Amit and Mayet, Jamil and Yap, Choon Hwai}, title = {EchoPOSE: 6D Pose Estimation of Sparse Echocardiograms for Left-Ventricular 3D Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22753-22762} }
FlexAvatar: Learning Complete 3D Head Avatars with Partial Supervision: Tobias Kirschstein,

Simon Giebenhain,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Kirschstein_2026_CVPR, author = {Kirschstein, Tobias and Giebenhain, Simon and Nie{\ss}ner, Matthias}, title = {FlexAvatar: Learning Complete 3D Head Avatars with Partial Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18193-18203} }
SurgCoT: Advancing Spatiotemporal Reasoning in Surgical Videos through a Chain-of-Thought Benchmark: Gui Wang,

YongSong Zhou,

Kaijun Deng,

Wooi Ping Cheah,

Rong Qu,

Jianfeng Ren,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Gui and Zhou, YongSong and Deng, Kaijun and Cheah, Wooi Ping and Qu, Rong and Ren, Jianfeng and Shen, Linlin}, title = {SurgCoT: Advancing Spatiotemporal Reasoning in Surgical Videos through a Chain-of-Thought Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17011-17021} }
S$^2$AM3D: Scale-controllable Part Segmentation of 3D Point Clouds: Han Su,

Tianyu Huang,

Zichen Wan,

Xiaohe Wu,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Han and Huang, Tianyu and Wan, Zichen and Wu, Xiaohe and Zuo, Wangmeng}, title = {S\${\textasciicircum}2\$AM3D: Scale-controllable Part Segmentation of 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14357-14366} }
V^2-SAM: Marrying SAM2 with Multi-Prompt Experts for Cross-View Object Correspondence: Jiancheng Pan,

Runze Wang,

Tianwen Qian,

Mohammad Mahdi,

Yanwei Fu,

Xiangyang Xue,

Xiaomeng Huang,

Luc Van Gool,

Danda Pani Paudel,

Yuqian Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Jiancheng and Wang, Runze and Qian, Tianwen and Mahdi, Mohammad and Fu, Yanwei and Xue, Xiangyang and Huang, Xiaomeng and Van Gool, Luc and Paudel, Danda Pani and Fu, Yuqian}, title = {V{\textasciicircum}2-SAM: Marrying SAM2 with Multi-Prompt Experts for Cross-View Object Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16910-16919} }
LumiX: Structured and Coherent Text-to-Intrinsic Generation: Xu Han,

Biao Zhang,

Xiangjun Tang,

Xianzhi Li,

Peter Wonka; [pdf] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xu and Zhang, Biao and Tang, Xiangjun and Li, Xianzhi and Wonka, Peter}, title = {LumiX: Structured and Coherent Text-to-Intrinsic Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21942-21952} }
Audio-sync Video Instance Editing with Granularity-Aware Mask Refiner: Haojie Zheng,

Shuchen Weng,

Jingqi Liu,

Siqi Yang,

Boxin Shi,

Xinlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haojie and Weng, Shuchen and Liu, Jingqi and Yang, Siqi and Shi, Boxin and Wang, Xinlong}, title = {Audio-sync Video Instance Editing with Granularity-Aware Mask Refiner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23150-23160} }
SCE-Depth: A Spherical Compound Eye Framework for Wide FOV Depth Estimation: Yi Zhu,

Hao Xiong,

Lin Xiao,

Ranfeng Shi,

Qinying Gu,

Leilei Gu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yi and Xiong, Hao and Xiao, Lin and Shi, Ranfeng and Gu, Qinying and Gu, Leilei}, title = {SCE-Depth: A Spherical Compound Eye Framework for Wide FOV Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26964-26973} }
CHAL: Causal-guided Hierarchical Anomaly-aware Learning for Moving Infrared Small Target Detection: Weiwei Duan,

Luping Ji,

Shipeng Lei,

Sicheng Zhu,

Jianghong Huang,

Mao Ye; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Weiwei and Ji, Luping and Lei, Shipeng and Zhu, Sicheng and Huang, Jianghong and Ye, Mao}, title = {CHAL: Causal-guided Hierarchical Anomaly-aware Learning for Moving Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21357-21366} }
History to Future: Evolving Agent with Experience and Thought for Zero-shot Vision-and-Language Navigation: Guangzhao Dai,

Shuo Wang,

Zihan Wang,

Guo-Sen Xie,

Yang Yang,

Jinshan Pan,

Qianru Sun,

Xiangbo Shu; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Guangzhao and Wang, Shuo and Wang, Zihan and Xie, Guo-Sen and Yang, Yang and Pan, Jinshan and Sun, Qianru and Shu, Xiangbo}, title = {History to Future: Evolving Agent with Experience and Thought for Zero-shot Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15177-15187} }
Learning Spatial-Temporal Consistency for 3D Semantic Scene Completion: Yujie Xue,

Meng Wang,

Ruihui Li,

Fan Wu,

Zhizhong Liu,

Zhuo Tang,

Kenli Li; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Yujie and Wang, Meng and Li, Ruihui and Wu, Fan and Liu, Zhizhong and Tang, Zhuo and Li, Kenli}, title = {Learning Spatial-Temporal Consistency for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28567-28577} }
Deformable Gaussian Occupancy: Decoupling Rigid and Nonrigid Motion with Factorized Distillation: Yang Gao,

Wuyang Li,

Po-Chien Luan,

Alexandre Alahi; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yang and Li, Wuyang and Luan, Po-Chien and Alahi, Alexandre}, title = {Deformable Gaussian Occupancy: Decoupling Rigid and Nonrigid Motion with Factorized Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28588-28598} }
Echoes Over Time: Unlocking Length Generalization in Video-to-Audio Generation Models: Christian Simon,

Masato Ishii,

Wei-Yao Wang,

Koichi Saito,

Akio Hayakawa,

Dongseok Shim,

Zhi Zhong,

Shuyang Cui,

Takashi Shibuya,

Shusuke Takahashi,

Yuki Mitsufuji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Simon_2026_CVPR, author = {Simon, Christian and Ishii, Masato and Wang, Wei-Yao and Saito, Koichi and Hayakawa, Akio and Shim, Dongseok and Zhong, Zhi and Cui, Shuyang and Shibuya, Takashi and Takahashi, Shusuke and Mitsufuji, Yuki}, title = {Echoes Over Time: Unlocking Length Generalization in Video-to-Audio Generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15840-15849} }
Flowception: Temporally Expansive Flow Matching for Video Generation: Tariq Berrada Ifriqi,

John Nguyen,

Karteek Alahari,

Jakob Verbeek,

Ricky T. Q. Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ifriqi_2026_CVPR, author = {Ifriqi, Tariq Berrada and Nguyen, John and Alahari, Karteek and Verbeek, Jakob and Chen, Ricky T. Q.}, title = {Flowception: Temporally Expansive Flow Matching for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16185-16195} }
DeRVOS: Decoupling Consistent Trajectory Generation and Multimodal Understanding for Referring Video Object Segmentation: Wenxuan Cheng,

Ming Dai,

Huimin Lu,

Wankou Yang; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Wenxuan and Dai, Ming and Lu, Huimin and Yang, Wankou}, title = {DeRVOS: Decoupling Consistent Trajectory Generation and Multimodal Understanding for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24651-24662} }
Spatial Retrieval Augmented Autonomous Driving: Xiaosong Jia,

Chenhe Zhang,

Yule Jiang,

Songbur Wong,

Zhiyuan Zhang,

Chen Chen,

Shaofeng Zhang,

Xuanhe Zhou,

Xue Yang,

Junchi Yan,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Xiaosong and Zhang, Chenhe and Jiang, Yule and Wong, Songbur and Zhang, Zhiyuan and Chen, Chen and Zhang, Shaofeng and Zhou, Xuanhe and Yang, Xue and Yan, Junchi and Jiang, Yu-Gang}, title = {Spatial Retrieval Augmented Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17787-17797} }
Decompose, Mix, Adapt: A Unified Framework for Parameter-Efficient Neural Network Recombination and Compression: Nazia Tasnim,

Shrimai Prabhumoye,

Bryan A. Plummer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tasnim_2026_CVPR, author = {Tasnim, Nazia and Prabhumoye, Shrimai and Plummer, Bryan A.}, title = {Decompose, Mix, Adapt: A Unified Framework for Parameter-Efficient Neural Network Recombination and Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19380-19392} }
Text-Driven 3D Hand Motion Generation from Sign Language Data: Léore Bensabath,

Mathis Petrovich,

Gül Varol; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bensabath_2026_CVPR, author = {Bensabath, L\'eore and Petrovich, Mathis and Varol, G\"ul}, title = {Text-Driven 3D Hand Motion Generation from Sign Language Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23095-23105} }
Steering Where to Diffuse: Generative Modeling of Phenotypic Response Simulation with Steered Diffusion Bridge: Rongchao Zhang,

Chengxin Li,

Yiwei Lou,

Yuling Shi,

Hanpin Wang,

Yu Huang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Rongchao and Li, Chengxin and Lou, Yiwei and Shi, Yuling and Wang, Hanpin and Huang, Yu}, title = {Steering Where to Diffuse: Generative Modeling of Phenotypic Response Simulation with Steered Diffusion Bridge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27367-27377} }
EduDiag: A Benchmark for Educational Diagnostic Reasoning with Error Tracing and Correction on Large Multimodal Models: Jiali Chen,

Yuqi Xue,

Xusen Hei,

DingBa Fu,

Yuancheng Wei,

Jiayuan Xie,

Yi Cai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiali and Xue, Yuqi and Hei, Xusen and Fu, DingBa and Wei, Yuancheng and Xie, Jiayuan and Cai, Yi}, title = {EduDiag: A Benchmark for Educational Diagnostic Reasoning with Error Tracing and Correction on Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15890-15901} }
ColaVLA: Leveraging Cognitive Latent Reasoning for Hierarchical Parallel Trajectory Planning in Autonomous Driving: Qihang Peng,

Xuesong Chen,

Chenye Yang,

Shaoshuai Shi,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Qihang and Chen, Xuesong and Yang, Chenye and Shi, Shaoshuai and Li, Hongsheng}, title = {ColaVLA: Leveraging Cognitive Latent Reasoning for Hierarchical Parallel Trajectory Planning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17809-17819} }
UniPixie: Unified and Probabilistic 3D Physics Learning via Flow Matching: Qilin Huang,

Quynh Anh Huynh,

Long Le,

Chen Wang,

Chuhao Chen,

Ryan Lucas,

Eric Eaton,

Lingjie Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qilin and Huynh, Quynh Anh and Le, Long and Wang, Chen and Chen, Chuhao and Lucas, Ryan and Eaton, Eric and Liu, Lingjie}, title = {UniPixie: Unified and Probabilistic 3D Physics Learning via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19907-19916} }
$L^{2}DGS$: Low-Light Dynamic Gaussian Splatting: Ashish Kumar,

Rajagopalan N Ambasamduram; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and Ambasamduram, Rajagopalan N}, title = {\$L{\textasciicircum}\{2\}DGS\$: Low-Light Dynamic Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19096-19106} }
Granulon: Awakening Pixel-Level Visual Encoders with Adaptive Multi-Granularity Semantics for MLLM: Junyuan Mao,

Qiankun Li,

Linghao Meng,

Zhicheng He,

Xinliang Zhou,

Kun Wang,

Yang Liu,

Yueming Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Junyuan and Li, Qiankun and Meng, Linghao and He, Zhicheng and Zhou, Xinliang and Wang, Kun and Liu, Yang and Jin, Yueming}, title = {Granulon: Awakening Pixel-Level Visual Encoders with Adaptive Multi-Granularity Semantics for MLLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26317-26327} }
AtomicVLA: Unlocking the Potential of Atomic Skill Learning in Robots: Likui Zhang,

Tao Tang,

Zhihao Zhan,

Xiuwei Chen,

Zisheng Chen,

Jianhua Han,

Jiangtong Zhu,

Pei Xu,

Hang Xu,

Hefeng Wu,

Liang Lin,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Likui and Tang, Tao and Zhan, Zhihao and Chen, Xiuwei and Chen, Zisheng and Han, Jianhua and Zhu, Jiangtong and Xu, Pei and Xu, Hang and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {AtomicVLA: Unlocking the Potential of Atomic Skill Learning in Robots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20743-20754} }
GaussianFluent: Gaussian Simulation for Dynamic Scenes with Mixed Materials: Bei Huang,

Yixin Chen,

Ruijie Lu,

Gang Zeng,

Hongbin Zha,

Yuru Pei,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Bei and Chen, Yixin and Lu, Ruijie and Zeng, Gang and Zha, Hongbin and Pei, Yuru and Huang, Siyuan}, title = {GaussianFluent: Gaussian Simulation for Dynamic Scenes with Mixed Materials}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21583-21593} }
Native and Compact Structured Latents for 3D Generation: Jianfeng Xiang,

Xiaoxue Chen,

Sicheng Xu,

Ruicheng Wang,

Zelong Lv,

Yu Deng,

Hongyuan Zhu,

Yue Dong,

Hao Zhao,

Nicholas Jing Yuan,

Jiaolong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Jianfeng and Chen, Xiaoxue and Xu, Sicheng and Wang, Ruicheng and Lv, Zelong and Deng, Yu and Zhu, Hongyuan and Dong, Yue and Zhao, Hao and Yuan, Nicholas Jing and Yang, Jiaolong}, title = {Native and Compact Structured Latents for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14419-14429} }
UFO: Unifying Feed-Forward and Optimization-based Methods for Large Driving Scene Modeling: Kaiyuan Tan,

Yingying Shen,

Ziyue Zhu,

Mingfei Tu,

Haohui Zhu,

Haiyang Sun,

Bing Wang,

Guang Chen,

Hangjun Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Kaiyuan and Shen, Yingying and Zhu, Ziyue and Tu, Mingfei and Zhu, Haohui and Sun, Haiyang and Wang, Bing and Chen, Guang and Ye, Hangjun}, title = {UFO: Unifying Feed-Forward and Optimization-based Methods for Large Driving Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21849-21859} }
XSeg: A Large-scale X-ray Contraband Segmentation Benchmark For Real-World Security Screening: Hongxia Gao,

Yixin Chen,

Jiali Wen,

Litao Li,

Qianyun Liu,

Kaijie Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Hongxia and Chen, Yixin and Wen, Jiali and Li, Litao and Liu, Qianyun and Zhang, Kaijie}, title = {XSeg: A Large-scale X-ray Contraband Segmentation Benchmark For Real-World Security Screening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24950-24959} }
Zoo3D: Zero-Shot 3D Object Detection at Scene Level: Andrey Lemeshko,

Bulat Gabdullin,

Nikita Drozdov,

Anton Konushin,

Danila Rukhovich,

Maksim Kolodiazhnyi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lemeshko_2026_CVPR, author = {Lemeshko, Andrey and Gabdullin, Bulat and Drozdov, Nikita and Konushin, Anton and Rukhovich, Danila and Kolodiazhnyi, Maksim}, title = {Zoo3D: Zero-Shot 3D Object Detection at Scene Level}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25820-25829} }
FilterGS: Traversal-Free Parallel Filtering and Adaptive Shrinking for Large-Scale LoD 3D Gaussian Splatting: Yixian Wang,

Haolin Yu,

Jiadong Tang,

Yu Gao,

Xihan Wang,

Yufeng Yue,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yixian and Yu, Haolin and Tang, Jiadong and Gao, Yu and Wang, Xihan and Yue, Yufeng and Yang, Yi}, title = {FilterGS: Traversal-Free Parallel Filtering and Adaptive Shrinking for Large-Scale LoD 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26052-26061} }
Unsupervised Multi-Scale Segmentation of 3D Subcellular World with Stable Diffusion Foundation Model: Mostofa Rafid Uddin,

HM Shadman Tabib,

Thanh-Huy Nguyen,

Kashish Gandhi,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Uddin_2026_CVPR, author = {Uddin, Mostofa Rafid and Tabib, HM Shadman and Nguyen, Thanh-Huy and Gandhi, Kashish and Xu, Min}, title = {Unsupervised Multi-Scale Segmentation of 3D Subcellular World with Stable Diffusion Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22744-22752} }
LiDAR-to-4DRadar Diffusion Bridge via Cross-Modal Alignment and Translation in Latent Space: Dazhong Shen,

Jingjing Gu,

Qiang Zhou,

Meng Zhao,

Ying Sun; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Dazhong and Gu, Jingjing and Zhou, Qiang and Zhao, Meng and Sun, Ying}, title = {LiDAR-to-4DRadar Diffusion Bridge via Cross-Modal Alignment and Translation in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17110-17120} }
Reparameterized Tensor Ring Functional Decomposition for Multi-Dimensional Data Recovery: Yangyang Xu,

Junbo Ke,

You-Wei Wen,

Chao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yangyang and Ke, Junbo and Wen, You-Wei and Wang, Chao}, title = {Reparameterized Tensor Ring Functional Decomposition for Multi-Dimensional Data Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26389-26398} }
Semi-Supervised Conformal Prediction With Unlabeled Nonconformity Score: Xuanning Zhou,

Zihao Shi,

Hao Zeng,

Xiaobo Xia,

Bingyi Jing,

Hongxin Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuanning and Shi, Zihao and Zeng, Hao and Xia, Xiaobo and Jing, Bingyi and Wei, Hongxin}, title = {Semi-Supervised Conformal Prediction With Unlabeled Nonconformity Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17704-17713} }
Test-Time Training for LiDAR Semantic Segmentation under Corruption via Geometric Inlier Discrimination: Hyeonseong Kim,

Hyun-Kurl Jang,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeonseong and Jang, Hyun-Kurl and Yoon, Kuk-Jin}, title = {Test-Time Training for LiDAR Semantic Segmentation under Corruption via Geometric Inlier Discrimination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24206-24216} }
Thinking Beyond Labels: Vocabulary-Free Fine-Grained Recognition using Reasoning-Augmented LMMs: Dmitry Demidov,

Muhammad Zaigham Zaheer,

Zongyan Han,

Omkar Thawakar,

Rao Anwer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Demidov_2026_CVPR, author = {Demidov, Dmitry and Zaheer, Muhammad Zaigham and Han, Zongyan and Thawakar, Omkar and Anwer, Rao}, title = {Thinking Beyond Labels: Vocabulary-Free Fine-Grained Recognition using Reasoning-Augmented LMMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16855-16864} }
SinGeo: Unlock Single Model's Potential for Robust Cross-View Geo-Localization: Yang Chen,

Xieyuanli Chen,

Junxiang Li,

Jie Tang,

Tao Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yang and Chen, Xieyuanli and Li, Junxiang and Tang, Jie and Wu, Tao}, title = {SinGeo: Unlock Single Model's Potential for Robust Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19403-19412} }
JarvisEvo: Towards a Self-Evolving Photo Editing Agent with Synergistic Editor-Evaluator Optimization: Yunlong Lin,

Linqing Wang,

Kunjie Lin,

Zixu Lin,

Kaixiong Gong,

Wenbo Li,

Bin Lin,

Zhenxi Li,

Shiyi Zhang,

Yuyang Peng,

Wenxun Dai,

Xinghao Ding,

Chunyu Wang,

Qinglin Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yunlong and Wang, Linqing and Lin, Kunjie and Lin, Zixu and Gong, Kaixiong and Li, Wenbo and Lin, Bin and Li, Zhenxi and Zhang, Shiyi and Peng, Yuyang and Dai, Wenxun and Ding, Xinghao and Wang, Chunyu and Lu, Qinglin}, title = {JarvisEvo: Towards a Self-Evolving Photo Editing Agent with Synergistic Editor-Evaluator Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27291-27302} }
SyncDreamer: Controllable and Expressive Avatar Generation Beyond the Talking Head: Fatemeh Nazarieh,

Zhenhua Feng,

Diptesh Kanojia,

Josef Kittler,

Muhammad Awais; [pdf] [supp]
[bibtex]
@InProceedings{Nazarieh_2026_CVPR, author = {Nazarieh, Fatemeh and Feng, Zhenhua and Kanojia, Diptesh and Kittler, Josef and Awais, Muhammad}, title = {SyncDreamer: Controllable and Expressive Avatar Generation Beyond the Talking Head}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25121-25130} }
SIMPACT: Simulation-Enabled Action Planning using Vision-Language Models: Haowen Liu,

Shaoxiong Yao,

Haonan Chen,

Jiawei Gao,

Jiayuan Mao,

Jia-Bin Huang,

Yilun Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haowen and Yao, Shaoxiong and Chen, Haonan and Gao, Jiawei and Mao, Jiayuan and Huang, Jia-Bin and Du, Yilun}, title = {SIMPACT: Simulation-Enabled Action Planning using Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20790-20801} }
What's Wrong with Synthetic Data for Scene Text Recognition? A Strong Synthetic Engine with Diverse Simulations and Self-Evolution: Xingsong Ye,

Yongkun Du,

JiaXin Zhang,

Chen Li,

Jing LYU,

Zhineng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xingsong and Du, Yongkun and Zhang, JiaXin and Li, Chen and LYU, Jing and Chen, Zhineng}, title = {What's Wrong with Synthetic Data for Scene Text Recognition? A Strong Synthetic Engine with Diverse Simulations and Self-Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16645-16654} }
Fed-ADE: Adaptive Learning Rate for Federated Post-adaptation under Distribution Shift: Heewon Park,

Mugon Joe,

Miru Kim,

Kyungjin Im,

Minhae Kwon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Heewon and Joe, Mugon and Kim, Miru and Im, Kyungjin and Kwon, Minhae}, title = {Fed-ADE: Adaptive Learning Rate for Federated Post-adaptation under Distribution Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24587-24597} }
PhaSR: Generalized Image Shadow Removal with Physically Aligned Priors: Chia-Ming Lee,

Yu-Fan Lin,

Yu-Jou Hsiao,

Jin-Hui Jiang,

Yu-Lun Liu,

Chih-Chung Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Chia-Ming and Lin, Yu-Fan and Hsiao, Yu-Jou and Jiang, Jin-Hui and Liu, Yu-Lun and Hsu, Chih-Chung}, title = {PhaSR: Generalized Image Shadow Removal with Physically Aligned Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22679-22688} }
H-Sets: Hessian-Guided Discovery of Set-Level Feature Interactions in Image Classifiers: Ayushi Mehrotra,

Dipkamal Bhusal,

Michael Clifford,

Nidhi Rastogi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehrotra_2026_CVPR, author = {Mehrotra, Ayushi and Bhusal, Dipkamal and Clifford, Michael and Rastogi, Nidhi}, title = {H-Sets: Hessian-Guided Discovery of Set-Level Feature Interactions in Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17067-17076} }
GenBreak: Red Teaming Text-to-Image Generation Using Large Language Models: Zilong Wang,

Xiang Zheng,

Xiaosen Wang,

Bo Wang,

Xingjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zilong and Zheng, Xiang and Wang, Xiaosen and Wang, Bo and Ma, Xingjun}, title = {GenBreak: Red Teaming Text-to-Image Generation Using Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15730-15739} }
OmniFM: Toward Modality-Robust and Task-Agnostic Federated Learning for Heterogeneous Medical Imaging: Meilin Liu,

Jiaying Wang,

Jing Shan; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Meilin and Wang, Jiaying and Shan, Jing}, title = {OmniFM: Toward Modality-Robust and Task-Agnostic Federated Learning for Heterogeneous Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21099-21109} }
Orthogonal Spatial-Aware Multi-View Anchor Graph Clustering for Incomplete Remote Sensing Data: Yongshan Zhang,

Xiaohuan Lin,

Lefei Zhang,

Zhihua Cai; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongshan and Lin, Xiaohuan and Zhang, Lefei and Cai, Zhihua}, title = {Orthogonal Spatial-Aware Multi-View Anchor Graph Clustering for Incomplete Remote Sensing Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20532-20541} }
Training-free Detection of Generated Videos via Spatial-Temporal Likelihoods: Omer Ben Hayun,

Roy Betser,

Meir Yossef Levi,

Levi Kassel,

Guy Gilboa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ben_Hayun_2026_CVPR, author = {Ben Hayun, Omer and Betser, Roy and Levi, Meir Yossef and Kassel, Levi and Gilboa, Guy}, title = {Training-free Detection of Generated Videos via Spatial-Temporal Likelihoods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16299-16310} }
PRISM: Video Dataset Condensation with Progressive Refinement and Insertion for Sparse Motion: Jaehyun Choi,

Jiwan Hur,

Gyojin Han,

Jaemyung Yu,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jaehyun and Hur, Jiwan and Han, Gyojin and Yu, Jaemyung and Kim, Junmo}, title = {PRISM: Video Dataset Condensation with Progressive Refinement and Insertion for Sparse Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26348-26357} }
FlowHijack: A Dynamics-Aware Backdoor Attack on Flow-Matching Vision-Language-Action Models: Xinyuan An,

Tao Luo,

Gengyun Peng,

Yaobing Wang,

Kui Ren,

Dongxia Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Xinyuan and Luo, Tao and Peng, Gengyun and Wang, Yaobing and Ren, Kui and Wang, Dongxia}, title = {FlowHijack: A Dynamics-Aware Backdoor Attack on Flow-Matching Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22879-22888} }
How Far Can We Go With Synthetic Data for Audio-Visual Sound Source Localization?: Arda Senocak,

Sooyoung Park,

Tae-Hyun Oh,

Joon Son Chung; [pdf] [supp]
[bibtex]
@InProceedings{Senocak_2026_CVPR, author = {Senocak, Arda and Park, Sooyoung and Oh, Tae-Hyun and Chung, Joon Son}, title = {How Far Can We Go With Synthetic Data for Audio-Visual Sound Source Localization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22931-22940} }
AMusE: Audio-Visual Benchmark and Alignment Framework for Agentic Multi-Speaker Understanding: Sanjoy Chowdhury,

Karren D Yang,

Xudong Liu,

Fartash Faghri,

Pavan Kumar Anasosalu Vasu,

Oncel Tuzel,

Dinesh Manocha,

Chun-Liang Li,

Raviteja Vemulapalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2026_CVPR, author = {Chowdhury, Sanjoy and Yang, Karren D and Liu, Xudong and Faghri, Fartash and Vasu, Pavan Kumar Anasosalu and Tuzel, Oncel and Manocha, Dinesh and Li, Chun-Liang and Vemulapalli, Raviteja}, title = {AMusE: Audio-Visual Benchmark and Alignment Framework for Agentic Multi-Speaker Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22998-23009} }
Global-Graph Guided and Local-Graph Weighted Contrastive Learning for Unified Clustering on Incomplete and Noise Multi-View Data: Hongqing He,

Jie Xu,

Wenyuan Yang,

Yonghua Zhu,

Guoqiu Wen,

Xiaofeng Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Hongqing and Xu, Jie and Yang, Wenyuan and Zhu, Yonghua and Wen, Guoqiu and Zhu, Xiaofeng}, title = {Global-Graph Guided and Local-Graph Weighted Contrastive Learning for Unified Clustering on Incomplete and Noise Multi-View Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24813-24822} }
GeoDiT: A Diffusion-based Vision-Language Model for Geospatial Understanding: Jiaqi Liu,

Ronghao Fu,

Haoran Liu,

Lang Sun,

Qipeng Wang,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Fu, Ronghao and Liu, Haoran and Sun, Lang and Wang, Qipeng and Yang, Bo}, title = {GeoDiT: A Diffusion-based Vision-Language Model for Geospatial Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20608-20618} }
CaTok: Taming Mean Flows for One-Dimensional Causal Image Tokenization: Yitong Chen,

Zuxuan Wu,

Xipeng Qiu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yitong and Wu, Zuxuan and Qiu, Xipeng and Jiang, Yu-Gang}, title = {CaTok: Taming Mean Flows for One-Dimensional Causal Image Tokenization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23161-23171} }
TrajRAG: Retrieving Geometric-Semantic Experience for Zero-Shot Object Navigation: Yiyao Wang,

Sixian Zhang,

Keming Zhang,

Xinhang Song,

Songjie Du,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyao and Zhang, Sixian and Zhang, Keming and Song, Xinhang and Du, Songjie and Jiang, Shuqiang}, title = {TrajRAG: Retrieving Geometric-Semantic Experience for Zero-Shot Object Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15166-15176} }
Bridging Brain and Semantics: A Hierarchical Framework for Semantically Enhanced fMRI-to-Video Reconstruction: Yujie Wei,

Chenglong Ma,

Jianxiong Gao,

Chenhui Wang,

Shiwei Zhang,

Biao Gong,

Shuai Tan,

Hangjie Yuan,

Hongming Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yujie and Ma, Chenglong and Gao, Jianxiong and Wang, Chenhui and Zhang, Shiwei and Gong, Biao and Tan, Shuai and Yuan, Hangjie and Shan, Hongming}, title = {Bridging Brain and Semantics: A Hierarchical Framework for Semantically Enhanced fMRI-to-Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28211-28223} }
Gallant: Voxel Grid-based Humanoid Locomotion and Local-navigation across 3-D Constrained Terrains: Qingwei Ben,

Botian Xu,

Kailin Li,

Feiyu Jia,

Wentao Zhang,

Jingping Wang,

Jingbo Wang,

Dahua Lin,

Jiangmiao Pang; [pdf] [supp]
[bibtex]
@InProceedings{Ben_2026_CVPR, author = {Ben, Qingwei and Xu, Botian and Li, Kailin and Jia, Feiyu and Zhang, Wentao and Wang, Jingping and Wang, Jingbo and Lin, Dahua and Pang, Jiangmiao}, title = {Gallant: Voxel Grid-based Humanoid Locomotion and Local-navigation across 3-D Constrained Terrains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28086-28095} }
UniM: A Unified Any-to-Any Interleaved Multimodal Benchmark: Yanlin Li,

Minghui Guo,

Kaiwen Zhang,

Shize Zhang,

Yiran Zhao,

Haodong Li,

Congyue Zhou,

Weijie Zheng,

Yushen Yan,

Shengqiong Wu,

Wei Ji,

Lei Cui,

Furu Wei,

Hao Fei,

Mong-Li Lee,

Wynne Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanlin and Guo, Minghui and Zhang, Kaiwen and Zhang, Shize and Zhao, Yiran and Li, Haodong and Zhou, Congyue and Zheng, Weijie and Yan, Yushen and Wu, Shengqiong and Ji, Wei and Cui, Lei and Wei, Furu and Fei, Hao and Lee, Mong-Li and Hsu, Wynne}, title = {UniM: A Unified Any-to-Any Interleaved Multimodal Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15902-15911} }
A Closer Look at Cross-Domain Few-Shot Object Detection: Fine-Tuning Matters and Parallel Decoder Helps: Xuanlong Yu,

Youyang Sha,

Longfei Liu,

Xi Shen,

Di Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xuanlong and Sha, Youyang and Liu, Longfei and Shen, Xi and Yang, Di}, title = {A Closer Look at Cross-Domain Few-Shot Object Detection: Fine-Tuning Matters and Parallel Decoder Helps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26593-26603} }
Spe-BEVHead: Rethinking the Detection Head Design for Bird's-Eye-View Object Detection: Junshu Zhang,

Sicheng Zhao,

Xin Zhao,

Fan Yang,

Ruike Chen,

Jungong Han,

Guiguang Ding; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junshu and Zhao, Sicheng and Zhao, Xin and Yang, Fan and Chen, Ruike and Han, Jungong and Ding, Guiguang}, title = {Spe-BEVHead: Rethinking the Detection Head Design for Bird's-Eye-View Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25799-25809} }
MVP: Multiple View Prediction Improves GUI Grounding: Yunzhu Zhang,

Zeyu Pan,

Zhengwen Zeng,

Shuheng Shen,

Changhua Meng,

Linchao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yunzhu and Pan, Zeyu and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Zhu, Linchao}, title = {MVP: Multiple View Prediction Improves GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27482-27492} }
Towards GUI Agents: Vision-Language Diffusion Models for GUI Grounding: Shrinidhi Kumbhar,

Haofu Liao,

Srikar Appalaraju,

Kunwar Yashraj Singh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumbhar_2026_CVPR, author = {Kumbhar, Shrinidhi and Liao, Haofu and Appalaraju, Srikar and Singh, Kunwar Yashraj}, title = {Towards GUI Agents: Vision-Language Diffusion Models for GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27493-27502} }
Efficient Hybrid SE(3)-Equivariant Visuomotor Flow Policy via Spherical Harmonics for Robot Manipulation: Qinglun Zhang,

Shen Cheng,

Tian Dan,

Haoqiang Fan,

Guanghui Liu,

Shuaicheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qinglun and Cheng, Shen and Dan, Tian and Fan, Haoqiang and Liu, Guanghui and Liu, Shuaicheng}, title = {Efficient Hybrid SE(3)-Equivariant Visuomotor Flow Policy via Spherical Harmonics for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27989-27998} }
PAD-Hand: Physics-Aware Diffusion for Hand Motion Recovery: Elkhan Ismayilzada,

Yufei Zhang,

Zijun Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ismayilzada_2026_CVPR, author = {Ismayilzada, Elkhan and Zhang, Yufei and Cui, Zijun}, title = {PAD-Hand: Physics-Aware Diffusion for Hand Motion Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28358-28368} }
Scaling Agentic Reinforcement Learning for Tool-Integrated Reasoning in VLMs: Meng Lu,

Ran Xu,

Yi Fang,

Wenxuan Zhang,

Yue Yu,

Gaurav Srivastava,

Yuchen Zhuang,

Mohamed Elhoseiny,

Charles Fleming,

Carl Yang,

Zhengzhong Tu,

Yang Xie,

Guanghua Xiao,

Di Jin,

Wenqi Shi,

Xuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Meng and Xu, Ran and Fang, Yi and Zhang, Wenxuan and Yu, Yue and Srivastava, Gaurav and Zhuang, Yuchen and Elhoseiny, Mohamed and Fleming, Charles and Yang, Carl and Tu, Zhengzhong and Xie, Yang and Xiao, Guanghua and Jin, Di and Shi, Wenqi and Wang, Xuan}, title = {Scaling Agentic Reinforcement Learning for Tool-Integrated Reasoning in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26518-26529} }
Unleashing VLA Potentials in Autonomous Driving via Explicit Learning from Failures: Yuechen Luo,

Fang Li,

Qimao Chen,

Shaoqing Xu,

Jiaxin Liu,

Ziying Song,

Zhi-xin Yang,

Fuxi Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Yuechen and Li, Fang and Chen, Qimao and Xu, Shaoqing and Liu, Jiaxin and Song, Ziying and Yang, Zhi-xin and Wen, Fuxi}, title = {Unleashing VLA Potentials in Autonomous Driving via Explicit Learning from Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24833-24842} }
Rethinking Visual Rearrangement from A Diffusion Perspective: Tianliang Qi,

Xinhang Song,

Yuyi Liu,

Shuqiang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Tianliang and Song, Xinhang and Liu, Yuyi and Jiang, Shuqiang}, title = {Rethinking Visual Rearrangement from A Diffusion Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15222-15231} }
ClimaOoD: Improving Anomaly Segmentation via Physically Realistic Synthetic Data: Yuxing Liu,

Zheng Li,

Huanhuan Liang,

Ji Zhang,

Zeyu Sun,

Yong Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxing and Li, Zheng and Liang, Huanhuan and Zhang, Ji and Sun, Zeyu and Liu, Yong}, title = {ClimaOoD: Improving Anomaly Segmentation via Physically Realistic Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17853-17862} }
Multi-View Hierarchical Alignment Learning for Spatial Transcriptomics: Zhengzhong Zhu,

Liangjin Liu,

Pei Zhou,

Shiquan Min,

Jiangping Zhu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Liu, Liangjin and Zhou, Pei and Min, Shiquan and Zhu, Jiangping}, title = {Multi-View Hierarchical Alignment Learning for Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26783-26792} }
Action-Sketcher: From Reasoning to Action via Visual Sketches for Robotic Manipulation: Huajie Tan,

Peterson Co,

Yijie Xu,

Shanyu Rong,

Yuheng Ji,

Cheng Chi,

Xiansheng Chen,

Zhongxia Zhao,

Pengwei Wang,

Zhongyuan Wang,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Huajie and Co, Peterson and Xu, Yijie and Rong, Shanyu and Ji, Yuheng and Chi, Cheng and Chen, Xiansheng and Zhao, Zhongxia and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {Action-Sketcher: From Reasoning to Action via Visual Sketches for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22433-22444} }
FAPE-IR: Frequency-Aware Planning and Execution Framework for All-in-One Image Restoration: Jingren Liu,

Shuning Xu,

Qirui Yang,

Yun Wang,

Xiangyu Chen,

Zhong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jingren and Xu, Shuning and Yang, Qirui and Wang, Yun and Chen, Xiangyu and Ji, Zhong}, title = {FAPE-IR: Frequency-Aware Planning and Execution Framework for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15561-15573} }
Asking like Socrates: Socrates helps VLMs understand remote sensing images: Run Shao,

Ziyu Li,

Zhaoyang Zhang,

Linrui Xu,

Xinran He,

Hongyuan Yuan,

Bolei He,

Yongxing Dai,

Yiming Yan,

Yijun Chen,

Wang Guo,

Haifeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Run and Li, Ziyu and Zhang, Zhaoyang and Xu, Linrui and He, Xinran and Yuan, Hongyuan and He, Bolei and Dai, Yongxing and Yan, Yiming and Chen, Yijun and Guo, Wang and Li, Haifeng}, title = {Asking like Socrates: Socrates helps VLMs understand remote sensing images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26465-26475} }
EgoSound: Benchmarking Sound Understanding in Egocentric Videos: Bingwen Zhu,

Yuqian Fu,

Qiaole Dong,

Guolei Sun,

Tianwen Qian,

Yuzheng Wu,

Danda Pani Paudel,

Yanwei Fu,

Xiangyang Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Bingwen and Fu, Yuqian and Dong, Qiaole and Sun, Guolei and Qian, Tianwen and Wu, Yuzheng and Paudel, Danda Pani and Fu, Yanwei and Xue, Xiangyang}, title = {EgoSound: Benchmarking Sound Understanding in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25589-25598} }
Test-Time Attention Purification for Backdoored Large Vision Language Models: Zhifang Zhang,

Bojun Yang,

Shuo He,

Weitong Chen,

Wei Emma Zhang,

Olaf Maennel,

Lei Feng,

Miao Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhifang and Yang, Bojun and He, Shuo and Chen, Weitong and Zhang, Wei Emma and Maennel, Olaf and Feng, Lei and Xu, Miao}, title = {Test-Time Attention Purification for Backdoored Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22826-22835} }
SCoRe: Salience-Coverage Reduction for Vision Token Pruning in Vision-Language Models: Tong Xu,

Hailong Shi,

Xingyu Gao; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Tong and Shi, Hailong and Gao, Xingyu}, title = {SCoRe: Salience-Coverage Reduction for Vision Token Pruning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24686-24695} }
Cross-Modal Guided Visual Synthesis for Data-Efficient Multimodal Depression Recognition: Shanliang Yang,

Xiaoxiao Wang; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Shanliang and Wang, Xiaoxiao}, title = {Cross-Modal Guided Visual Synthesis for Data-Efficient Multimodal Depression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15933-15943} }
Perceiving the Near, Reasoning the Distant: Coherent Long-Horizon Trajectory Prediction for Autonomous Driving: Hua Hu,

Zikang Zhou,

Qian Zhou,

Zihao Wen,

Junjie Hu,

Xinhong Chen,

Zhengmin Jiang,

Yung-Hui Li,

Jianping Wang; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Hua and Zhou, Zikang and Zhou, Qian and Wen, Zihao and Hu, Junjie and Chen, Xinhong and Jiang, Zhengmin and Li, Yung-Hui and Wang, Jianping}, title = {Perceiving the Near, Reasoning the Distant: Coherent Long-Horizon Trajectory Prediction for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24875-24884} }
RoboAgent: Chaining Basic Capabilities for Embodied Task Planning: Peiran Xu,

Jiaqi Zheng,

Yadong Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Peiran and Zheng, Jiaqi and Mu, Yadong}, title = {RoboAgent: Chaining Basic Capabilities for Embodied Task Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15276-15290} }
Unsupervised Monocular 3D Keypoint Discovery from Multi-View Diffusion Priors: Subin Jeon,

In Cho,

Junyoung Hong,

Woong Oh Cho,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2026_CVPR, author = {Jeon, Subin and Cho, In and Hong, Junyoung and Cho, Woong Oh and Kim, Seon Joo}, title = {Unsupervised Monocular 3D Keypoint Discovery from Multi-View Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17132-17142} }
URScenes: A Multi-scenario Dataset for Unstructured Road Environments: Runsen Liu,

Aizemaitijiang Baoerhan,

Zhangyu Wang,

Jie Wang,

Jinghao Cui,

Guizhen Yu,

Songyue Yang,

WanCheng Sun,

Mingjun Tang,

Zhanbo Hua,

Wenwen Luo; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Runsen and Baoerhan, Aizemaitijiang and Wang, Zhangyu and Wang, Jie and Cui, Jinghao and Yu, Guizhen and Yang, Songyue and Sun, WanCheng and Tang, Mingjun and Hua, Zhanbo and Luo, Wenwen}, title = {URScenes: A Multi-scenario Dataset for Unstructured Road Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17874-17883} }
From Indoor to Open World: Revealing the Spatial Reasoning Gap in MLLMs: Mingrui Wu,

Zhaozhi Wang,

Fangjinhua Wang,

Jiaolong Yang,

Marc Pollefeys,

Tong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mingrui and Wang, Zhaozhi and Wang, Fangjinhua and Yang, Jiaolong and Pollefeys, Marc and Zhang, Tong}, title = {From Indoor to Open World: Revealing the Spatial Reasoning Gap in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16789-16799} }
MARSS: Radar Semantic Segmentation via Modular Attention and State Space Models: Fengyu Chen,

Tiao Tan,

Teng Li,

Yuantian Quan,

Qingmin Liao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Fengyu and Tan, Tiao and Li, Teng and Quan, Yuantian and Liao, Qingmin}, title = {MARSS: Radar Semantic Segmentation via Modular Attention and State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17959-17968} }
Towards Generalizable AI-Generated Image Detection via Image-Adaptive Prompt Learning: Yiheng Li,

Zichang Tan,

Guoqing Xu,

Zhen Lei,

Xu Zhou,

Yang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yiheng and Tan, Zichang and Xu, Guoqing and Lei, Zhen and Zhou, Xu and Yang, Yang}, title = {Towards Generalizable AI-Generated Image Detection via Image-Adaptive Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21262-21272} }
WAM-Flow: Parallel Coarse-to-Fine Motion Planning via Discrete Flow Matching for Autonomous Driving: Yifang Xu,

Jiahao Cui,

Zhihao Zhu,

Hanlin Shang,

Shan Luan,

Mingwang Xu,

Feipeng Cai,

Neng Zhang,

Yaoyi Li,

Jia Cai,

Siyu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yifang and Cui, Jiahao and Zhu, Zhihao and Shang, Hanlin and Luan, Shan and Xu, Mingwang and Cai, Feipeng and Zhang, Neng and Li, Yaoyi and Cai, Jia and Zhu, Siyu}, title = {WAM-Flow: Parallel Coarse-to-Fine Motion Planning via Discrete Flow Matching for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24918-24928} }
Universal-to-Specific: Dynamic Knowledge-Guided Multiple Instance Learning for Few-Shot Whole Slide Image Classification: Junjian Li,

Hulin Kuang,

Jin Liu,

Hailin Yue,

Mengshen He,

Jianxin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Junjian and Kuang, Hulin and Liu, Jin and Yue, Hailin and He, Mengshen and Wang, Jianxin}, title = {Universal-to-Specific: Dynamic Knowledge-Guided Multiple Instance Learning for Few-Shot Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26614-26623} }
Boosting Vision-Language-Action Finetuning with Feasible Action Neighborhood Prior: Haochen Niu,

Kanyu Zhang,

Shuyu Yin,

Qinghai Guo,

Peilin Liu,

Fei Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Haochen and Zhang, Kanyu and Yin, Shuyu and Guo, Qinghai and Liu, Peilin and Wen, Fei}, title = {Boosting Vision-Language-Action Finetuning with Feasible Action Neighborhood Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27956-27966} }
VideoITG: Multimodal Video Understanding with Instructed Temporal Grounding: Shihao Wang,

Guo Chen,

De-An Huang,

Zhiqi Li,

Minghan Li,

Guilin Liu,

Jan Kautz,

Jose M. Alvarez,

Lei Zhang,

Zhiding Yu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shihao and Chen, Guo and Huang, De-An and Li, Zhiqi and Li, Minghan and Liu, Guilin and Kautz, Jan and Alvarez, Jose M. and Zhang, Lei and Yu, Zhiding}, title = {VideoITG: Multimodal Video Understanding with Instructed Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24640-24650} }
KAMP: Knowledge-Anchored Multimodal Pretraining Framework for Medical Image Representation: Feiyu Huang,

Jia Li,

Zhao Chen,

Yang Wu,

Caleb Chen Cao,

Lei Chen; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Feiyu and Li, Jia and Chen, Zhao and Wu, Yang and Cao, Caleb Chen and Chen, Lei}, title = {KAMP: Knowledge-Anchored Multimodal Pretraining Framework for Medical Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21067-21077} }
Saliency-R1: Enforcing Interpretable and Faithful Vision-language Reasoning via Saliency-map Alignment Reward: Shizhan Gong,

Minda Hu,

Qiyuan Zhang,

Chen Ma,

Qi Dou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Shizhan and Hu, Minda and Zhang, Qiyuan and Ma, Chen and Dou, Qi}, title = {Saliency-R1: Enforcing Interpretable and Faithful Vision-language Reasoning via Saliency-map Alignment Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24095-24106} }
Open-Vocabulary Domain Generalization in Urban-Scene Segmentation: Dong Zhao,

Qi Zang,

Nan Pu,

Wenjing Li,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dong and Zang, Qi and Pu, Nan and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {Open-Vocabulary Domain Generalization in Urban-Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20388-20398} }
HiFi-BRep: High-Fidelity Latent Representation for Robust B-Rep Generation: Junhao Hou,

Chenqi Luo,

Pufan Wang,

Jiaying Lu,

Yusheng Liu,

Feiwei Qin,

Meie Fang,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Junhao and Luo, Chenqi and Wang, Pufan and Lu, Jiaying and Liu, Yusheng and Qin, Feiwei and Fang, Meie and Zhou, Kun}, title = {HiFi-BRep: High-Fidelity Latent Representation for Robust B-Rep Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27199-27208} }
Text-Printed Image: Bridging the Image-Text Modality Gap for Text-centric Training of Large Vision-Language Models: Shojiro Yamabe,

Futa Waseda,

Daiki Shiono,

Tsubasa Takahashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamabe_2026_CVPR, author = {Yamabe, Shojiro and Waseda, Futa and Shiono, Daiki and Takahashi, Tsubasa}, title = {Text-Printed Image: Bridging the Image-Text Modality Gap for Text-centric Training of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17270-17281} }
HBridge: H-Shape Bridging of Heterogeneous Experts for Unified Multimodal Understanding and Generation: Xiang Wang,

Zhifei Zhang,

He Zhang,

Zhe Lin,

Yuqian Zhou,

Qing Liu,

Shiwei Zhang,

Yijun Li,

Shaoteng Liu,

Haitian Zheng,

Jason Kuen,

Yuehuan Wang,

Changxin Gao,

Nong Sang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiang and Zhang, Zhifei and Zhang, He and Lin, Zhe and Zhou, Yuqian and Liu, Qing and Zhang, Shiwei and Li, Yijun and Liu, Shaoteng and Zheng, Haitian and Kuen, Jason and Wang, Yuehuan and Gao, Changxin and Sang, Nong}, title = {HBridge: H-Shape Bridging of Heterogeneous Experts for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14767-14778} }
Unified Number-Free Text-to-Motion Generation Via Flow Matching: Guanhe Huang,

Oya Celiktutan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Guanhe and Celiktutan, Oya}, title = {Unified Number-Free Text-to-Motion Generation Via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23570-23580} }
VQ-VA World: Towards High-Quality Visual Question-Visual Answering: Chenhui Gou,

Zilong Chen,

Zeyu Wang,

Feng Li,

Deyao Zhu,

Zicheng Duan,

Kunchang Li,

Chaorui Deng,

Hongyi Yuan,

Haoqi Fan,

Cihang Xie,

Jianfei Cai,

Hamid Rezatofighi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gou_2026_CVPR, author = {Gou, Chenhui and Chen, Zilong and Wang, Zeyu and Li, Feng and Zhu, Deyao and Duan, Zicheng and Li, Kunchang and Deng, Chaorui and Yuan, Hongyi and Fan, Haoqi and Xie, Cihang and Cai, Jianfei and Rezatofighi, Hamid}, title = {VQ-VA World: Towards High-Quality Visual Question-Visual Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18837-18847} }
From Contrast to Consistency: Rethinking Event-based Continuous-Time Optical Flow Estimation: Rui Hu,

Song Wu,

Wen Yang,

Jinjian Wu; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Rui and Wu, Song and Yang, Wen and Wu, Jinjian}, title = {From Contrast to Consistency: Rethinking Event-based Continuous-Time Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15125-15134} }
ChordEdit: One-Step Low-Energy Transport for Image Editing: Liangsi Lu,

Xuhang Chen,

Minzhe Guo,

Shichu Li,

Jingchao Wang,

Yang Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Liangsi and Chen, Xuhang and Guo, Minzhe and Li, Shichu and Wang, Jingchao and Shi, Yang}, title = {ChordEdit: One-Step Low-Energy Transport for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14398-14407} }
MagicFuse: Single Image Fusion for Visual and Semantic Reinforcement: Hao Zhang,

Yanping Zha,

Zizhuo Li,

Meiqi Gong,

Jiayi Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Zha, Yanping and Li, Zizhuo and Gong, Meiqi and Ma, Jiayi}, title = {MagicFuse: Single Image Fusion for Visual and Semantic Reinforcement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26551-26560} }
RobotSeg: A Model and Dataset for Segmenting Robots in Image and Video: Haiyang Mei,

Qiming Huang,

Hai Ci,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2026_CVPR, author = {Mei, Haiyang and Huang, Qiming and Ci, Hai and Shou, Mike Zheng}, title = {RobotSeg: A Model and Dataset for Segmenting Robots in Image and Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14346-14356} }
Self-supervised Dynamic Heterogeneous Degradation Modeling for Unified Zero-Shot Image Restoration: XiaoWan Hu,

Jing Yang,

HeNan Liu,

HuaQiu Li,

Mai Xu; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, XiaoWan and Yang, Jing and Liu, HeNan and Li, HuaQiu and Xu, Mai}, title = {Self-supervised Dynamic Heterogeneous Degradation Modeling for Unified Zero-Shot Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22658-22668} }
Learning to Learn Weight Generation via Local Consistency Diffusion: Yunchuan Guan,

Yu Liu,

Ke Zhou,

Zhiqi Shen,

Jenq-Neng Hwang,

Lei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Yunchuan and Liu, Yu and Zhou, Ke and Shen, Zhiqi and Hwang, Jenq-Neng and Li, Lei}, title = {Learning to Learn Weight Generation via Local Consistency Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19623-19633} }
3D-LATTE: Latent Space 3D Editing from Textual Instructions: Maria Parelli,

Michael Oechsle,

Michael Niemeyer,

Federico Tombari,

Andreas Geiger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parelli_2026_CVPR, author = {Parelli, Maria and Oechsle, Michael and Niemeyer, Michael and Tombari, Federico and Geiger, Andreas}, title = {3D-LATTE: Latent Space 3D Editing from Textual Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14377-14386} }
NI-Tex: Non-isometric Image-based Garment Texture Generation: Hui Shan,

Ming Li,

Haitao Yang,

Kai Zheng,

Sizhe Zheng,

Yanwei Fu,

Xiangru Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Hui and Li, Ming and Yang, Haitao and Zheng, Kai and Zheng, Sizhe and Fu, Yanwei and Huang, Xiangru}, title = {NI-Tex: Non-isometric Image-based Garment Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19884-19893} }
Active Intelligence in Video Avatars via Closed-loop World Modeling: Xuanhua He,

Tianyu Yang,

Ke Cao,

Ruiqi Wu,

Cheng Meng,

Yong Zhang,

Zhuoliang Kang,

Xiaoming Wei,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xuanhua and Yang, Tianyu and Cao, Ke and Wu, Ruiqi and Meng, Cheng and Zhang, Yong and Kang, Zhuoliang and Wei, Xiaoming and Chen, Qifeng}, title = {Active Intelligence in Video Avatars via Closed-loop World Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27239-27248} }
Generative Diffusion Priors for 3D Mapping of the Dark Universe: Brandon Zhao,

Diana Scognamiglio,

Olivier Doré,

Katherine L. Bouman; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Brandon and Scognamiglio, Diana and Dor\'e, Olivier and Bouman, Katherine L.}, title = {Generative Diffusion Priors for 3D Mapping of the Dark Universe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23581-23590} }
PlanaReLoc: Camera Relocalization in 3D Planar Primitives via Region-Based Structure Matching: Hanqiao Ye,

Yuzhou Liu,

Yangdong Liu,

Shuhan Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Hanqiao and Liu, Yuzhou and Liu, Yangdong and Shen, Shuhan}, title = {PlanaReLoc: Camera Relocalization in 3D Planar Primitives via Region-Based Structure Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26409-26421} }
Generalizable Knowledge Distillation from Vision Foundation Models for Semantic Segmentation: Chonghua Lv,

Dong Zhao,

Shuang Wang,

Dou Quan,

Ning Huyan,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Chonghua and Zhao, Dong and Wang, Shuang and Quan, Dou and Huyan, Ning and Sebe, Nicu and Zhong, Zhun}, title = {Generalizable Knowledge Distillation from Vision Foundation Models for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26656-26666} }
Coverage Optimization for Camera View Selection: Timothy Chen,

Adam Dai,

Maximilian Adang,

Grace Gao,

Mac Schwager; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Timothy and Dai, Adam and Adang, Maximilian and Gao, Grace and Schwager, Mac}, title = {Coverage Optimization for Camera View Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19443-19451} }
StableMaterials: Enhancing Diversity in Material Generation via Semi-Supervised Learning: Giuseppe Vecchio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vecchio_2026_CVPR, author = {Vecchio, Giuseppe}, title = {StableMaterials: Enhancing Diversity in Material Generation via Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19665-19675} }
MEMO: Human-like Crisp Edge Detection Using Masked Edge Prediction: Jiaxin Cheng,

Yue Wu,

Yicong Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Jiaxin and Wu, Yue and Zhou, Yicong}, title = {MEMO: Human-like Crisp Edge Detection Using Masked Edge Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27740-27749} }
UniSER: A Foundation Model for Unified Soft Effects Removal: Jingdong Zhang,

Lingzhi Zhang,

Qing Liu,

Mang Tik Chiu,

Connelly Barnes,

Yizhou Wang,

Haoran You,

Xiaoyang Liu,

Yuqian Zhou,

Zhe Lin,

Eli Shechtman,

Sohrab Amirghodsi,

Xin Li,

Wenping Wang,

Xiaohang Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingdong and Zhang, Lingzhi and Liu, Qing and Chiu, Mang Tik and Barnes, Connelly and Wang, Yizhou and You, Haoran and Liu, Xiaoyang and Zhou, Yuqian and Lin, Zhe and Shechtman, Eli and Amirghodsi, Sohrab and Li, Xin and Wang, Wenping and Zhan, Xiaohang}, title = {UniSER: A Foundation Model for Unified Soft Effects Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16096-16107} }
Efficient Frame Selection for Long Video Understanding via Reinforcement Learning: Yaxuan Qin,

Hefei Li,

Wenqi Mu,

Yancheng He; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yaxuan and Li, Hefei and Mu, Wenqi and He, Yancheng}, title = {Efficient Frame Selection for Long Video Understanding via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16944-16953} }
PIX-TAB: Efficient PIXel-Precise TABle Structure Recognition Approach with Speculative Decoding and Region-Based Image Segmentation: Viktor Zaytsev,

Olena Vynokurova,

Pavlo Tytarchuk,

Dmytro Kozii,

Vitalii Pohribnyi,

Olga Radyvonenko,

Artem Shcherbina; [pdf] [supp]
[bibtex]
@InProceedings{Zaytsev_2026_CVPR, author = {Zaytsev, Viktor and Vynokurova, Olena and Tytarchuk, Pavlo and Kozii, Dmytro and Pohribnyi, Vitalii and Radyvonenko, Olga and Shcherbina, Artem}, title = {PIX-TAB: Efficient PIXel-Precise TABle Structure Recognition Approach with Speculative Decoding and Region-Based Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23912-23921} }
Any2Any 3D Diffusion Models with Knowledge Transfer: A Radiotherapy Planning Study: Yuhan Wang,

Zihan Li,

Han Liu,

Simon Arberet,

Martin Kraus,

Yuyin Zhou,

Florin-Cristian Ghesu,

Dorin Comaniciu,

Ali Kamen,

Riqiang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhan and Li, Zihan and Liu, Han and Arberet, Simon and Kraus, Martin and Zhou, Yuyin and Ghesu, Florin-Cristian and Comaniciu, Dorin and Kamen, Ali and Gao, Riqiang}, title = {Any2Any 3D Diffusion Models with Knowledge Transfer: A Radiotherapy Planning Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16520-16530} }
VLM-PTQ: Efficient Post-Training Quantization for Large Vision-Language Models: Juncan Deng,

Kejie Huang; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Juncan and Huang, Kejie}, title = {VLM-PTQ: Efficient Post-Training Quantization for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24696-24705} }
PointCNN++: Performant Convolution on Native Points: Lihan Li,

Haofeng Zhong,

Rui Bu,

Mingchao Sun,

Wenzheng Chen,

Baoquan Chen,

Yangyan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lihan and Zhong, Haofeng and Bu, Rui and Sun, Mingchao and Chen, Wenzheng and Chen, Baoquan and Li, Yangyan}, title = {PointCNN++: Performant Convolution on Native Points}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24151-24161} }
High-Fidelity Diffusion Face Swapping with ID-Constrained Facial Conditioning: Dailan He,

Xiahong Wang,

Shulun Wang,

Hao Shao,

Bingqi Ma,

Guanglu Song,

Yu Liu,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Dailan and Wang, Xiahong and Wang, Shulun and Shao, Hao and Ma, Bingqi and Song, Guanglu and Liu, Yu and Li, Hongsheng}, title = {High-Fidelity Diffusion Face Swapping with ID-Constrained Facial Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25767-25776} }
Weakly Supervised Video Anomaly Detection with Anomaly-Connected Components and Intention Reasoning: Yu Wang,

Shengjie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yu and Zhao, Shengjie}, title = {Weakly Supervised Video Anomaly Detection with Anomaly-Connected Components and Intention Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28545-28556} }
CSF: Black-box Fingerprinting via Compositional Semantics for Text-to-Image Models: Junhoo Lee,

Mijin Koo,

Nojun Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junhoo and Koo, Mijin and Kwak, Nojun}, title = {CSF: Black-box Fingerprinting via Compositional Semantics for Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16572-16582} }
Vision Transformers Need More Than Registers: Cheng Shi,

Yizhou Yu,

Sibei Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Cheng and Yu, Yizhou and Yang, Sibei}, title = {Vision Transformers Need More Than Registers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26328-26337} }
Prompt Yourself: Awakening Textual Semantics in 1D Visual Tokenizers: Hualiang Wang,

Siming Fu,

Weinan Jia,

Yuning Lu,

Mu Liu,

Jidong Jiang,

Xiaomeng Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hualiang and Fu, Siming and Jia, Weinan and Lu, Yuning and Liu, Mu and Jiang, Jidong and Li, Xiaomeng}, title = {Prompt Yourself: Awakening Textual Semantics in 1D Visual Tokenizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14864-14874} }
Distilling Balanced Knowledge from a Biased Teacher: Seonghak Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seonghak}, title = {Distilling Balanced Knowledge from a Biased Teacher}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18032-18041} }
Parameterized Prompt for Incremental Object Detection: Zijia An,

Boyu Diao,

Ruiqi Liu,

Libo Huang,

Chuanguang Yang,

Fei Wang,

Zhulin An,

Yongjun Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Zijia and Diao, Boyu and Liu, Ruiqi and Huang, Libo and Yang, Chuanguang and Wang, Fei and An, Zhulin and Xu, Yongjun}, title = {Parameterized Prompt for Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27600-27610} }
PixelDiT: Pixel Diffusion Transformers for Image Generation: Yongsheng Yu,

Wei Xiong,

Weili Nie,

Yichen Sheng,

Shiqiu Liu,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yongsheng and Xiong, Wei and Nie, Weili and Sheng, Yichen and Liu, Shiqiu and Luo, Jiebo}, title = {PixelDiT: Pixel Diffusion Transformers for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14273-14282} }
BrepGaussian: CAD reconstruction from Multi-View Images with Gaussian Splatting: Jiaxing Yu,

Dongyang Ren,

Hangyu Xu,

Zhouyuxiao Yang,

Yuanqi Li,

Jie Guo,

Zhengkang Zhou,

Yanwen Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Jiaxing and Ren, Dongyang and Xu, Hangyu and Yang, Zhouyuxiao and Li, Yuanqi and Guo, Jie and Zhou, Zhengkang and Guo, Yanwen}, title = {BrepGaussian: CAD reconstruction from Multi-View Images with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26104-26113} }
HiF-VLA: Hindsight, Insight and Foresight through Motion Representation for Vision-Language-Action Models: Minghui Lin,

Pengxiang Ding,

Shu Wang,

Zifeng Zhuang,

Yang Liu,

Xinyang Tong,

Wenxuan Song,

Shangke Lyu,

Siteng Huang,

Donglin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Minghui and Ding, Pengxiang and Wang, Shu and Zhuang, Zifeng and Liu, Yang and Tong, Xinyang and Song, Wenxuan and Lyu, Shangke and Huang, Siteng and Wang, Donglin}, title = {HiF-VLA: Hindsight, Insight and Foresight through Motion Representation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20732-20742} }
VLM4RSDet: Collaborative Optimization with Vision-Language Model for Enhancing Remote Sensing Object Detection: Shuohao Shi,

Qiang Fang,

Xin Xu; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Shuohao and Fang, Qiang and Xu, Xin}, title = {VLM4RSDet: Collaborative Optimization with Vision-Language Model for Enhancing Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18450-18460} }
Global Structure-from-Motion Meets Feedforward Reconstruction: Linfei Pan,

Johannes Schönberger,

Marc Pollefeys; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Linfei and Sch\"onberger, Johannes and Pollefeys, Marc}, title = {Global Structure-from-Motion Meets Feedforward Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21880-21890} }
TUNA: Taming Unified Visual Representations for Native Unified Multimodal Models: Zhiheng Liu,

Weiming Ren,

Haozhe Liu,

Zijian Zhou,

Shoufa Chen,

Haonan Qiu,

Xiaoke Huang,

Zhaochong An,

Fanny Yang,

Aditya Patel,

Viktar Atliha,

Tony Ng,

Xiao Han,

Chuyan Zhu,

Chenyang Zhang,

Ding Liu,

Juan-Manuel Perez-Rua,

Sen He,

Jürgen Schmidhuber,

Wenhu Chen,

Ping Luo,

Wei Liu,

Tao Xiang,

Jonas Schult,

Yuren Cong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiheng and Ren, Weiming and Liu, Haozhe and Zhou, Zijian and Chen, Shoufa and Qiu, Haonan and Huang, Xiaoke and An, Zhaochong and Yang, Fanny and Patel, Aditya and Atliha, Viktar and Ng, Tony and Han, Xiao and Zhu, Chuyan and Zhang, Chenyang and Liu, Ding and Perez-Rua, Juan-Manuel and He, Sen and Schmidhuber, J\"urgen and Chen, Wenhu and Luo, Ping and Liu, Wei and Xiang, Tao and Schult, Jonas and Cong, Yuren}, title = {TUNA: Taming Unified Visual Representations for Native Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15740-15751} }
Compositional Transformation Reasoning for Composed Video Retrieval: Sihong Huang,

Jiaxin Wu,

Dongmei Jiang,

Yi Cai,

Yaowei Wang,

Xiaoyong Wei; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Sihong and Wu, Jiaxin and Jiang, Dongmei and Cai, Yi and Wang, Yaowei and Wei, Xiaoyong}, title = {Compositional Transformation Reasoning for Composed Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25644-25653} }
IGen: Scalable Data Generation for Robot Learning from Open-World Images: Chenghao Gu,

Haolan Kang,

Junchao Lin,

Jinghe Wang,

Duo Wu,

Shuzhao Xie,

Fanding Huang,

Junchen Ge,

Ziyang Gong,

Letian Li,

Hongying Zheng,

Changwei Lv,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Chenghao and Kang, Haolan and Lin, Junchao and Wang, Jinghe and Wu, Duo and Xie, Shuzhao and Huang, Fanding and Ge, Junchen and Gong, Ziyang and Li, Letian and Zheng, Hongying and Lv, Changwei and Wang, Zhi}, title = {IGen: Scalable Data Generation for Robot Learning from Open-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28111-28122} }
Weight Space Representation Learning via Neural Field Adaptation: Zhuoqian Yang,

Mathieu Salzmann,

Sabine Süsstrunk; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhuoqian and Salzmann, Mathieu and S\"usstrunk, Sabine}, title = {Weight Space Representation Learning via Neural Field Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17734-17743} }
Bringing Your Portrait to 3D Presence: Jiawei Zhang,

Lei Chu,

Jiahao Li,

Zhenyu Zang,

Chong Li,

Xiao Li,

Xun Cao,

Hao Zhu,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiawei and Chu, Lei and Li, Jiahao and Zang, Zhenyu and Li, Chong and Li, Xiao and Cao, Xun and Zhu, Hao and Lu, Yan}, title = {Bringing Your Portrait to 3D Presence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28468-28480} }
OpenMMReasoner: Pushing the Frontiers in Multimodal Reasoning with an Open and General Recipe: Kaichen Zhang,

Keming Wu,

Zuhao Yang,

Bo Li,

Kairui Hu,

Bin Wang,

Xingxuan Li,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaichen and Wu, Keming and Yang, Zuhao and Li, Bo and Hu, Kairui and Wang, Bin and Li, Xingxuan and Bing, Lidong}, title = {OpenMMReasoner: Pushing the Frontiers in Multimodal Reasoning with an Open and General Recipe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19276-19286} }
Dynamic Visual SLAM using a General 3D Prior: Xingguang Zhong,

Liren Jin,

Marija Popovic,

Jens Behley,

Cyrill Stachniss; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Xingguang and Jin, Liren and Popovic, Marija and Behley, Jens and Stachniss, Cyrill}, title = {Dynamic Visual SLAM using a General 3D Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21805-21815} }
W2W: Language-Model-Based Trajectory Prediction with Reinforcement Learning: Zirui Xu,

Biao Yang,

Rongrong Ni,

Zhongkai Zhou,

Shaobo Shen; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zirui and Yang, Biao and Ni, Rongrong and Zhou, Zhongkai and Shen, Shaobo}, title = {W2W: Language-Model-Based Trajectory Prediction with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23538-23548} }
RAID: Retrieval-Augmented Anomaly Detection: Mingxiu Cai,

Zhe Zhang,

Gaochang Wu,

Tianyou Chai,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Mingxiu and Zhang, Zhe and Wu, Gaochang and Chai, Tianyou and Zhu, Xiatian}, title = {RAID: Retrieval-Augmented Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21367-21378} }
ShowTable: Unlocking Creative Table Visualization with Collaborative Reflection and Refinement: Zhihang Liu,

Xiaoyi Bao,

Pandeng Li,

Junjie Zhou,

Zhaohe Liao,

Yefei He,

Kaixun Jiang,

Chen-Wei Xie,

Yun Zheng,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhihang and Bao, Xiaoyi and Li, Pandeng and Zhou, Junjie and Liao, Zhaohe and He, Yefei and Jiang, Kaixun and Xie, Chen-Wei and Zheng, Yun and Xie, Hongtao}, title = {ShowTable: Unlocking Creative Table Visualization with Collaborative Reflection and Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24405-24416} }
Hybrid Agents for Image Restoration: Bingchen Li,

Xin Li,

Yiting Lu,

Zhibo Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingchen and Li, Xin and Lu, Yiting and Chen, Zhibo}, title = {Hybrid Agents for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22636-22647} }
Same or Not? Enhancing Visual Perception in Vision-Language Models: Damiano Marsili,

Aditya Mehta,

Ryan Y. Lin,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marsili_2026_CVPR, author = {Marsili, Damiano and Mehta, Aditya and Lin, Ryan Y. and Gkioxari, Georgia}, title = {Same or Not? Enhancing Visual Perception in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17303-17315} }
Turbo-GS: Accelerating 3D Gaussian Fitting for High-Resolution Radiance Fields: Ankit Dhiman,

Tao Lu,

R Srinath,

Emre Arslan,

Angela Xing,

Yuanbo Xiangli,

Venkatesh Babu Radhakrishnan,

Srinath Sridhar; [pdf] [supp]
[bibtex]
@InProceedings{Dhiman_2026_CVPR, author = {Dhiman, Ankit and Lu, Tao and Srinath, R and Arslan, Emre and Xing, Angela and Xiangli, Yuanbo and Radhakrishnan, Venkatesh Babu and Sridhar, Srinath}, title = {Turbo-GS: Accelerating 3D Gaussian Fitting for High-Resolution Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15454-15464} }
EmbodMocap: In-the-Wild 4D Human-Scene Reconstruction for Embodied Agents: Wenjia Wang,

Liang Pan,

Huaijin Pi,

Yuke Lou,

Xuqian Ren,

Yifan Wu,

Zhouyingcheng Liao,

Lei Yang,

Rishabh Dabral,

Christian Theobalt,

Taku Komura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Wenjia and Pan, Liang and Pi, Huaijin and Lou, Yuke and Ren, Xuqian and Wu, Yifan and Liao, Zhouyingcheng and Yang, Lei and Dabral, Rishabh and Theobalt, Christian and Komura, Taku}, title = {EmbodMocap: In-the-Wild 4D Human-Scene Reconstruction for Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28424-28434} }
Reconstruction-Guided Slot Curriculum: Addressing Object Over-Fragmentation in Video Object-Centric Learning: WonJun Moon,

Hyun Seok Seong,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Reconstruction-Guided Slot Curriculum: Addressing Object Over-Fragmentation in Video Object-Centric Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25001-25010} }
Virtual Full-stack Scanning of Brain MRI via Imputing Any Quantised Code: Yicheng Wu,

Tao Song,

Zhonghua Wu,

Jin Ye,

Zongyuan Ge,

Wenjia Bai,

Zhaolin Chen,

Jianfei Cai; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yicheng and Song, Tao and Wu, Zhonghua and Ye, Jin and Ge, Zongyuan and Bai, Wenjia and Chen, Zhaolin and Cai, Jianfei}, title = {Virtual Full-stack Scanning of Brain MRI via Imputing Any Quantised Code}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21026-21035} }
MoD-DPO: Towards Mitigating Cross-modal Hallucinations in Omni LLMs using Modality Decoupled Preference Optimization: Ashutosh Chaubey,

Jiacheng Pang,

Mohammad Soleymani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaubey_2026_CVPR, author = {Chaubey, Ashutosh and Pang, Jiacheng and Soleymani, Mohammad}, title = {MoD-DPO: Towards Mitigating Cross-modal Hallucinations in Omni LLMs using Modality Decoupled Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18284-18294} }
PerpetualWonder: Long-horizon Action-conditioned 4D Scene Generation: Jiahao Zhan,

Zizhang Li,

Hong-Xing Yu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jiahao and Li, Zizhang and Yu, Hong-Xing and Wu, Jiajun}, title = {PerpetualWonder: Long-horizon Action-conditioned 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25370-25380} }
SIF: Semantically In-Distribution Fingerprints for Large Vision-Language Models: Yifei Zhao,

Qian Lou,

Mengxin Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yifei and Lou, Qian and Zheng, Mengxin}, title = {SIF: Semantically In-Distribution Fingerprints for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17399-17408} }
ReFTA: Breaking the Weight Reconstruction Bottleneck in Tensorized Parameter-Efficient Fine-Tuning: Jingjing Zheng,

Anda Tang,

Qiangqiang Mao,

Zhouchen Lin,

Yankai Cao; [pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jingjing and Tang, Anda and Mao, Qiangqiang and Lin, Zhouchen and Cao, Yankai}, title = {ReFTA: Breaking the Weight Reconstruction Bottleneck in Tensorized Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26369-26378} }
From Exploration to Exploitation: A Two-Stage Entropy RLVR Approach for Noise-Tolerant MLLM Training: Donglai Xu,

Hongzheng Yang,

Yuzhi Zhao,

Pingping Zhang,

Jinpeng Chen,

Wenao Ma,

Zhijian Hou,

Mengyang Wu,

Xiaolei Li,

Senkang Hu,

Ziyi Guan,

Jason Chun Lok Li,

Lai-Man Po; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Donglai and Yang, Hongzheng and Zhao, Yuzhi and Zhang, Pingping and Chen, Jinpeng and Ma, Wenao and Hou, Zhijian and Wu, Mengyang and Li, Xiaolei and Hu, Senkang and Guan, Ziyi and Li, Jason Chun Lok and Po, Lai-Man}, title = {From Exploration to Exploitation: A Two-Stage Entropy RLVR Approach for Noise-Tolerant MLLM Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17776-17786} }
Dual-Level Hypergraph Generation for Addressing Feature Scarcity in Whole-Slide Image Classification: Shuilian Yao,

Qi Jia,

Yu Liu,

Pengshuo Zhang,

Lili Sun,

Weimin Wang,

Yanmei Zhu,

Bo Zhang,

Xin Fan; [pdf]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Shuilian and Jia, Qi and Liu, Yu and Zhang, Pengshuo and Sun, Lili and Wang, Weimin and Zhu, Yanmei and Zhang, Bo and Fan, Xin}, title = {Dual-Level Hypergraph Generation for Addressing Feature Scarcity in Whole-Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28328-28337} }
LAM: Language Articulated Object Modelers: Yipeng Gao,

Yunhao Ge,

Peilin Cai,

Daniel Seita,

Laurent Itti; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yipeng and Ge, Yunhao and Cai, Peilin and Seita, Daniel and Itti, Laurent}, title = {LAM: Language Articulated Object Modelers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16010-16020} }
TAPE: Task-Adaptive Prototype Evolution in Audio-Language Models for Fully Few-shot Class-incremental Audio Classification: Yunlong Gao,

Wenxin Liang,

Guanglu Wang,

Senqi Guan,

Linlin Zong,

Dongyu Zhang,

Xinyue Liu; [pdf]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yunlong and Liang, Wenxin and Wang, Guanglu and Guan, Senqi and Zong, Linlin and Zhang, Dongyu and Liu, Xinyue}, title = {TAPE: Task-Adaptive Prototype Evolution in Audio-Language Models for Fully Few-shot Class-incremental Audio Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19570-19579} }
IAG: Input-aware Backdoor Attack on VLM-based Visual Grounding: Junxian Li,

Beining Xu,

Simin Chen,

Jiatong Li,

Jingdi Lei,

Haodong Zhao,

Di Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Junxian and Xu, Beining and Chen, Simin and Li, Jiatong and Lei, Jingdi and Zhao, Haodong and Zhang, Di}, title = {IAG: Input-aware Backdoor Attack on VLM-based Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27872-27883} }
AceTone: Bridging Words and Colors for Conditional Image Grading: Tianren Ma,

Mingxiang Liao,

Xijin Zhang,

Qixiang Ye; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tianren and Liao, Mingxiang and Zhang, Xijin and Ye, Qixiang}, title = {AceTone: Bridging Words and Colors for Conditional Image Grading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25851-25860} }
Homaloidal parametrization for detecting critical two-view configurations: Rakshith Madhavan,

Matteo Forlivesi,

Marina Bertolini,

Cristina Turrini,

Federica Arrigoni,

Luca Magri; [pdf] [supp]
[bibtex]
@InProceedings{Madhavan_2026_CVPR, author = {Madhavan, Rakshith and Forlivesi, Matteo and Bertolini, Marina and Turrini, Cristina and Arrigoni, Federica and Magri, Luca}, title = {Homaloidal parametrization for detecting critical two-view configurations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26432-26440} }
Generate, Analyze, and Refine: Training-Free Sound Source Localization via MLLM Meta-Reasoning: Subin Park,

Jung Uk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Subin and Kim, Jung Uk}, title = {Generate, Analyze, and Refine: Training-Free Sound Source Localization via MLLM Meta-Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15752-15761} }
Detect Anything via Next Point Prediction: Qing Jiang,

Junan Huo,

Xingyu Chen,

Yuda Xiong,

Zhaoyang Zeng,

Yihao Chen,

Tianhe Ren,

Junzhi Yu,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Qing and Huo, Junan and Chen, Xingyu and Xiong, Yuda and Zeng, Zhaoyang and Chen, Yihao and Ren, Tianhe and Yu, Junzhi and Zhang, Lei}, title = {Detect Anything via Next Point Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25472-25483} }
VAST: Video Ability-Stratified Taxonomy for Data-Efficient Video Reasoning: Zhongan Wang,

Xiaoyu Wen,

Lingxiao Du,

Kun Li,

Zhiliang Wu,

Xingcheng Xu,

Qiaosheng Zhang,

Chaochao Lu,

Hehe Fan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongan and Wen, Xiaoyu and Du, Lingxiao and Li, Kun and Wu, Zhiliang and Xu, Xingcheng and Zhang, Qiaosheng and Lu, Chaochao and Fan, Hehe}, title = {VAST: Video Ability-Stratified Taxonomy for Data-Efficient Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18576-18586} }
Harmonious Parameter Adaptation in Continual Visual Instruction Tuning for Safety-Aligned MLLMs: Ziqi Wang,

Chang Che,

Qi Wang,

Hui Ma,

Zenglin Shi,

Cees G. M. Snoek,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziqi and Che, Chang and Wang, Qi and Ma, Hui and Shi, Zenglin and Snoek, Cees G. M. and Wang, Meng}, title = {Harmonious Parameter Adaptation in Continual Visual Instruction Tuning for Safety-Aligned MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17282-17291} }
From Pairs to Sequences: Track-Aware Policy Gradients for Keypoint Detection: Yepeng Liu,

Hao Li,

Liwen Yang,

Fangzhen Li,

Xudi Ge,

Yuliang Gu,

Kuang Gao,

Bing Wang,

Guang Chen,

Hangjun Ye,

Yongchao Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yepeng and Li, Hao and Yang, Liwen and Li, Fangzhen and Ge, Xudi and Gu, Yuliang and Gao, Kuang and Wang, Bing and Chen, Guang and Ye, Hangjun and Xu, Yongchao}, title = {From Pairs to Sequences: Track-Aware Policy Gradients for Keypoint Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21453-21463} }
Do Less, Achieve More: Do We Need Every-Step Optimization for RL Fine-tuning of Diffusion Models?: Renye Yan,

Jikang Cheng,

Shikun Sun,

Yi Sun,

You Wu,

Wei Peng,

Zongwei Wang,

Ling Liang,

Junliang Xing,

Yimao Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Renye and Cheng, Jikang and Sun, Shikun and Sun, Yi and Wu, You and Peng, Wei and Wang, Zongwei and Liang, Ling and Xing, Junliang and Cai, Yimao}, title = {Do Less, Achieve More: Do We Need Every-Step Optimization for RL Fine-tuning of Diffusion Models?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16561-16571} }
Synthetic Curriculum Reinforces Compositional Text-to-Image Generation: Shijian Wang,

Runhao Fu,

Siyi Zhao,

Qingqin Zhan,

Xingjian Wang,

Jiarui Jin,

Yuan Lu,

Hanqian Wu,

Cunjian Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shijian and Fu, Runhao and Zhao, Siyi and Zhan, Qingqin and Wang, Xingjian and Jin, Jiarui and Lu, Yuan and Wu, Hanqian and Chen, Cunjian}, title = {Synthetic Curriculum Reinforces Compositional Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21953-21963} }
Opti-NeuS: Neural Reconstruction for Dual-Layered Transparent and Opaque Objects: Yi Yang,

Gaoyang Zhang,

Jun Tan,

Xinguo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Zhang, Gaoyang and Tan, Jun and Liu, Xinguo}, title = {Opti-NeuS: Neural Reconstruction for Dual-Layered Transparent and Opaque Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22594-22603} }
Drive My Way: Preference Alignment of Vision-Language-Action Model for Personalized Driving: Zehao Wang,

Huaide Jiang,

Shuaiwu Dong,

Yuping Wang,

Hang Qiu,

Jiachen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zehao and Jiang, Huaide and Dong, Shuaiwu and Wang, Yuping and Qiu, Hang and Li, Jiachen}, title = {Drive My Way: Preference Alignment of Vision-Language-Action Model for Personalized Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25204-25214} }
EventGait: Towards Robust Gait Recognition with Event Streams: Senyan Xu,

Shuai Chen,

Chuanfu Shen,

Kean Liu,

Zhijing Sun,

Chengzhi Cao,

Xueyang Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Senyan and Chen, Shuai and Shen, Chuanfu and Liu, Kean and Sun, Zhijing and Cao, Chengzhi and Fu, Xueyang}, title = {EventGait: Towards Robust Gait Recognition with Event Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22323-22334} }
Rethinking SNN Online Training and Deployment: Gradient-Coherent Learning via Hybrid-Driven LIF Model: Zecheng Hao,

Yifan Huang,

Zijie Xu,

Wenxuan Liu,

Yuanhong Tang,

Zhaofei Yu,

Tiejun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Zecheng and Huang, Yifan and Xu, Zijie and Liu, Wenxuan and Tang, Yuanhong and Yu, Zhaofei and Huang, Tiejun}, title = {Rethinking SNN Online Training and Deployment: Gradient-Coherent Learning via Hybrid-Driven LIF Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20202-20211} }
MMTIT-Bench: A Multilingual and Multi-Scenario Benchmark with Cognition-Perception-Reasoning Guided Text-Image Machine Translation: Gengluo Li,

Chengquan Zhang,

Yupu Liang,

Huawen Shen,

Yaping Zhang,

Pengyuan Lyu,

Weinong Wang,

Xingyu Wan,

Gangyan Zeng,

Han Hu,

Can Ma,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Gengluo and Zhang, Chengquan and Liang, Yupu and Shen, Huawen and Zhang, Yaping and Lyu, Pengyuan and Wang, Weinong and Wan, Xingyu and Zeng, Gangyan and Hu, Han and Ma, Can and Zhou, Yu}, title = {MMTIT-Bench: A Multilingual and Multi-Scenario Benchmark with Cognition-Perception-Reasoning Guided Text-Image Machine Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16593-16602} }
Interact2Ar: Full-Body Human-Human Interaction Generation via Autoregressive Diffusion Models: Pablo Ruiz-Ponce,

Sergio Escalera,

José García-Rodríguez,

Jiankang Deng,

Rolandos Alexandros Potamias; [pdf] [supp]
[bibtex]
@InProceedings{Ruiz-Ponce_2026_CVPR, author = {Ruiz-Ponce, Pablo and Escalera, Sergio and Garc{\'\i}a-Rodr{\'\i}guez, Jos\'e and Deng, Jiankang and Potamias, Rolandos Alexandros}, title = {Interact2Ar: Full-Body Human-Human Interaction Generation via Autoregressive Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23559-23569} }
A Supervised Multi-task Framework for Joint cryo-ET Restoration Enabled by Generative Physical Simulation: Xinsheng Wang,

Zhidong Yang,

Xiaohua Wan,

Renmin Han,

Shuai Tang,

Hao Dong,

Fa Zhang,

Bin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinsheng and Yang, Zhidong and Wan, Xiaohua and Han, Renmin and Tang, Shuai and Dong, Hao and Zhang, Fa and Hu, Bin}, title = {A Supervised Multi-task Framework for Joint cryo-ET Restoration Enabled by Generative Physical Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21057-21066} }
AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects: Danrui Li,

Jiahao Zhang,

Bernhard Egger,

Moitreya Chatterjee,

Suhas Lohit,

Tim K. Marks,

Anoop Cherian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Danrui and Zhang, Jiahao and Egger, Bernhard and Chatterjee, Moitreya and Lohit, Suhas and Marks, Tim K. and Cherian, Anoop}, title = {AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17326-17335} }
SPAR: Single-Pass Any-Resolution ViT for Open-vocabulary Segmentation: Naomi Kombol,

Ivan Martinović,

Siniša Šegvić,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Kombol_2026_CVPR, author = {Kombol, Naomi and Martinovi\'c, Ivan and \v{S}egvi\'c, Sini\v{s}a and Tolias, Giorgos}, title = {SPAR: Single-Pass Any-Resolution ViT for Open-vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27579-27589} }
PyramidalWan: On Making Pretrained Video Model Pyramidal for Efficient Inference: Denis Korzhenkov,

Adil Karjauv,

Animesh Karnewar,

Mohsen Ghafoorian,

Amirhossein Habibian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Korzhenkov_2026_CVPR, author = {Korzhenkov, Denis and Karjauv, Adil and Karnewar, Animesh and Ghafoorian, Mohsen and Habibian, Amirhossein}, title = {PyramidalWan: On Making Pretrained Video Model Pyramidal for Efficient Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16153-16162} }
Nestwork: Conditional 3D Furnished House Layout Generation through Latent Heterogeneous Graph Diffusion: Shuhan Miao,

Biru Cao,

Junling Zhuang; [pdf] [supp]
[bibtex]
@InProceedings{Miao_2026_CVPR, author = {Miao, Shuhan and Cao, Biru and Zhuang, Junling}, title = {Nestwork: Conditional 3D Furnished House Layout Generation through Latent Heterogeneous Graph Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27093-27103} }
Lafite: A Generative Latent Field for 3D Native Texturing: Chia-Hao Chen,

Yuan-Chen Guo,

Zi-Xin Zou,

Ze Yuan,

Guan Luo,

Xiaojuan Qi,

Ding Liang,

Yan-Pei Cao,

Song-Hai Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Chia-Hao and Guo, Yuan-Chen and Zou, Zi-Xin and Yuan, Ze and Luo, Guan and Qi, Xiaojuan and Liang, Ding and Cao, Yan-Pei and Zhang, Song-Hai}, title = {Lafite: A Generative Latent Field for 3D Native Texturing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19960-19971} }
Obstruction Reasoning for Robotic Grasping: Runyu Jiao,

Matteo Bortolon,

Francesco Giuliari,

Alice Fasoli,

Sergio Povoli,

Guofeng Mei,

Yiming Wang,

Fabio Poiesi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Runyu and Bortolon, Matteo and Giuliari, Francesco and Fasoli, Alice and Povoli, Sergio and Mei, Guofeng and Wang, Yiming and Poiesi, Fabio}, title = {Obstruction Reasoning for Robotic Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20755-20764} }
Beyond Multiple Choice: Verifiable OpenQA for Robust Vision-Language RFT: Yesheng Liu,

Hao Li,

Haiyu Xu,

Baoqi Pei,

Jiahao Wang,

Mingxuan Zhao,

Jing-Shu Zheng,

Zheqi He,

JG Yao,

Xi Yang,

Bowen Qin,

Jiajun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yesheng and Li, Hao and Xu, Haiyu and Pei, Baoqi and Wang, Jiahao and Zhao, Mingxuan and Zheng, Jing-Shu and He, Zheqi and Yao, JG and Yang, Xi and Qin, Bowen and Zhang, Jiajun}, title = {Beyond Multiple Choice: Verifiable OpenQA for Robust Vision-Language RFT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18859-18869} }
PartDiffuser: Part-wise 3D Mesh Generation via Discrete Diffusion: Yichen Yang,

Hong Li,

Haodong Zhu,

Linin Yang,

Guojun Lei,

Sheng Xu,

Baochang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yichen and Li, Hong and Zhu, Haodong and Yang, Linin and Lei, Guojun and Xu, Sheng and Zhang, Baochang}, title = {PartDiffuser: Part-wise 3D Mesh Generation via Discrete Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19940-19949} }
Plenoptic Video Generation: Xiao Fu,

Shitao Tang,

Min Shi,

Xian Liu,

Jinwei Gu,

Ming-Yu Liu,

Dahua Lin,

Chen-Hsuan Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Xiao and Tang, Shitao and Shi, Min and Liu, Xian and Gu, Jinwei and Liu, Ming-Yu and Lin, Dahua and Lin, Chen-Hsuan}, title = {Plenoptic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16142-16152} }
Seeing through Light and Darkness: Sensor-Physics Grounded Deblurring HDR NeRF from Single-Exposure Images and Events: Yunshan Qi,

Lin Zhu,

Nan Bao,

Yifan Zhao,

Jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Yunshan and Zhu, Lin and Bao, Nan and Zhao, Yifan and Li, Jia}, title = {Seeing through Light and Darkness: Sensor-Physics Grounded Deblurring HDR NeRF from Single-Exposure Images and Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22723-22732} }
AutoRegressive Generation with B-rep Holistic Token Sequence Representation: Jiahao Li,

Yunpeng Bai,

Yongkang Dai,

Hao Guo,

Hongping Gan,

Yilei Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Bai, Yunpeng and Dai, Yongkang and Guo, Hao and Gan, Hongping and Shi, Yilei}, title = {AutoRegressive Generation with B-rep Holistic Token Sequence Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24363-24372} }
Is your VLM Sky-Ready? A Comprehensive Spatial Intelligence Benchmark for UAV Navigation: Lingfeng Zhang,

Yuchen Zhang,

Hongsheng Li,

Haoxiang Fu,

Yingbo Tang,

Hangjun Ye,

Long Chen,

Xiaojun Liang,

Xiaoshuai Hao,

Wenbo Ding; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lingfeng and Zhang, Yuchen and Li, Hongsheng and Fu, Haoxiang and Tang, Yingbo and Ye, Hangjun and Chen, Long and Liang, Xiaojun and Hao, Xiaoshuai and Ding, Wenbo}, title = {Is your VLM Sky-Ready? A Comprehensive Spatial Intelligence Benchmark for UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25904-25913} }
Towards Multimodal Domain Generalization with Few Labels: Hongzhao Li,

Hao Dong,

Hualei Wan,

Shupan Li,

Mingliang Xu,

Muhammad Haris Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hongzhao and Dong, Hao and Wan, Hualei and Li, Shupan and Xu, Mingliang and Khan, Muhammad Haris}, title = {Towards Multimodal Domain Generalization with Few Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15070-15079} }
Reclaiming Lost Text Layers for Source-Free Cross-Domain Few-Shot Learning: Zhenyu Zhang,

Guangyao Chen,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenyu and Chen, Guangyao and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Reclaiming Lost Text Layers for Source-Free Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15080-15090} }
DTG-Restore: Training-Free Diffusion Refinement for Generative Video Super-Resolution: Hidir Yesiltepe,

Koutilya PNVR,

Gaurav Pathak,

Navaneeth Bodla,

Bharat Singh,

Pinar Yanardag,

Jinrong Xie; [pdf] [supp]
[bibtex]
@InProceedings{Yesiltepe_2026_CVPR, author = {Yesiltepe, Hidir and PNVR, Koutilya and Pathak, Gaurav and Bodla, Navaneeth and Singh, Bharat and Yanardag, Pinar and Xie, Jinrong}, title = {DTG-Restore: Training-Free Diffusion Refinement for Generative Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23335-23344} }
Enhancing Spatial Understanding in Image Generation via Reward Modeling: Zhenyu Tang,

Chaoran Feng,

Yufan Deng,

Jie Wu,

Xiaojie Li,

Rui Wang,

Yunpeng Chen,

Daquan Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zhenyu and Feng, Chaoran and Deng, Yufan and Wu, Jie and Li, Xiaojie and Wang, Rui and Chen, Yunpeng and Zhou, Daquan}, title = {Enhancing Spatial Understanding in Image Generation via Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27249-27259} }
Towards Efficient Medical Reasoning with Minimal Fine-Tuning Data: Xinlin Zhuang,

Feilong Tang,

Haolin Yang,

Xiwei Liu,

Ming Hu,

Huifa Li,

Haochen Xue,

Junjun He,

Zongyuan Ge,

Yichen Li,

Ying Qian,

Imran Razzak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xinlin and Tang, Feilong and Yang, Haolin and Liu, Xiwei and Hu, Ming and Li, Huifa and Xue, Haochen and He, Junjun and Ge, Zongyuan and Li, Yichen and Qian, Ying and Razzak, Imran}, title = {Towards Efficient Medical Reasoning with Minimal Fine-Tuning Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20223-20232} }
The SA-FARI Dataset: Segment Anything in Footage of Animals for Recognition and Identification: Dante Wasmuht,

Otto Brookes,

Maximilian Schall,

Pablo Palencia,

Christopher Beirne,

Tilo Burghardt,

Majid Mirmehdi,

Hjalmar Kühl,

Mimi Arandjelovic,

Sam Pottie,

Peter Bermant,

Brandon Asheim,

Yi Jin Toh,

Adam Elzinga,

Jason Allan Holmberg,

Andrew Whitworth,

Eleanor Flatt,

Laura Gustafson,

Chaitanya Ryali,

Yuan-Ting Hu,

Baishan Guo,

Andrew Westbury,

Kate Saenko,

Didac Suris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wasmuht_2026_CVPR, author = {Wasmuht, Dante and Brookes, Otto and Schall, Maximilian and Palencia, Pablo and Beirne, Christopher and Burghardt, Tilo and Mirmehdi, Majid and K\"uhl, Hjalmar and Arandjelovic, Mimi and Pottie, Sam and Bermant, Peter and Asheim, Brandon and Toh, Yi Jin and Elzinga, Adam and Holmberg, Jason Allan and Whitworth, Andrew and Flatt, Eleanor and Gustafson, Laura and Ryali, Chaitanya and Hu, Yuan-Ting and Guo, Baishan and Westbury, Andrew and Saenko, Kate and Suris, Didac}, title = {The SA-FARI Dataset: Segment Anything in Footage of Animals for Recognition and Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21679-21689} }
Perceptual 3D Simulation With Physical World Modeling: Wanhee Lee,

Klemen Kotar,

Rahul Mysore Venkatesh,

Jared Watrous,

Daniel LK Yamins; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Wanhee and Kotar, Klemen and Venkatesh, Rahul Mysore and Watrous, Jared and Yamins, Daniel LK}, title = {Perceptual 3D Simulation With Physical World Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27219-27228} }
PMRNet: Physics-informed Multi-scale Refinement Network for Medical Image Segmentation: Boce Kang; [pdf]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Boce}, title = {PMRNet: Physics-informed Multi-scale Refinement Network for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15659-15668} }
FlexAvatar: Flexible Large Reconstruction Model for Animatable Gaussian Head Avatars with Detailed Deformation: Cheng Peng,

Zhuo Su,

Liao Wang,

Chen Guo,

Zhaohu Li,

Chengjiang Long,

Zheng Lv,

Jingxiang Sun,

Chenyangguang Zhang,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Cheng and Su, Zhuo and Wang, Liao and Guo, Chen and Li, Zhaohu and Long, Chengjiang and Lv, Zheng and Sun, Jingxiang and Zhang, Chenyangguang and Liu, Yebin}, title = {FlexAvatar: Flexible Large Reconstruction Model for Animatable Gaussian Head Avatars with Detailed Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18229-18240} }
INSID3: Training-Free In-Context Segmentation with DINOv3: Claudia Cuttano,

Gabriele Trivigno,

Christoph Reich,

Daniel Cremers,

Carlo Masone,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cuttano_2026_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Reich, Christoph and Cremers, Daniel and Masone, Carlo and Roth, Stefan}, title = {INSID3: Training-Free In-Context Segmentation with DINOv3}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21638-21648} }
Towards Realistic and Consistent Orbital Video Generation via 3D Foundation Priors: Rong Wang,

Ruyi Zha,

Ziang Cheng,

Jiayu Yang,

Pulak Purkait,

Hongdong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Rong and Zha, Ruyi and Cheng, Ziang and Yang, Jiayu and Purkait, Pulak and Li, Hongdong}, title = {Towards Realistic and Consistent Orbital Video Generation via 3D Foundation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18407-18417} }
Relightful Video Portrait Harmonization: Jun Myeong Choi,

Jae Shin Yoon,

Luchao Qi,

Roni Sengupta,

Joon-Young Lee; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jun Myeong and Yoon, Jae Shin and Qi, Luchao and Sengupta, Roni and Lee, Joon-Young}, title = {Relightful Video Portrait Harmonization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23356-23366} }
GEM: Generating LiDAR World Model via Deformable Mamba: Yang Wu,

Zhaojiang Liu,

Qiang Meng,

Youquan Liu,

Renliang Weng,

Jianjun Qian,

Jian Yang,

Jin Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yang and Liu, Zhaojiang and Meng, Qiang and Liu, Youquan and Weng, Renliang and Qian, Jianjun and Yang, Jian and Xie, Jin}, title = {GEM: Generating LiDAR World Model via Deformable Mamba}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24227-24236} }
Featurising Pixels from Dynamic 3D Scenes with Linear In-Context Learners: Nikita Araslanov,

Martin Sundermeyer,

Hidenobu Matsuki,

David Joseph Tan,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Araslanov_2026_CVPR, author = {Araslanov, Nikita and Sundermeyer, Martin and Matsuki, Hidenobu and Tan, David Joseph and Tombari, Federico}, title = {Featurising Pixels from Dynamic 3D Scenes with Linear In-Context Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21443-21452} }
CRIT: Graph-Based Automatic Data Synthesis to Enhance Cross-Modal Multi-Hop Reasoning: Junyoung Sung,

Seungwoo Lyu,

Minjun Kim,

Sumin An,

Arsha Nagrani,

Paul Hongsuck Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sung_2026_CVPR, author = {Sung, Junyoung and Lyu, Seungwoo and Kim, Minjun and An, Sumin and Nagrani, Arsha and Seo, Paul Hongsuck}, title = {CRIT: Graph-Based Automatic Data Synthesis to Enhance Cross-Modal Multi-Hop Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19143-19154} }
RNN as Linear Transformer: A Closer Investigation into Representational Potentials of Visual Mamba Models: Timing Yang,

Feng Wang,

Guoyizhe Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Timing and Wang, Feng and Wei, Guoyizhe}, title = {RNN as Linear Transformer: A Closer Investigation into Representational Potentials of Visual Mamba Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27398-27408} }
AMB3R: Accurate Feed-forward Metric-scale 3D Reconstruction with Backend: Hengyi Wang,

Lourdes Agapito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hengyi and Agapito, Lourdes}, title = {AMB3R: Accurate Feed-forward Metric-scale 3D Reconstruction with Backend}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14612-14625} }
PatchScene: Patch-based Voxel Diffusion Model for Large-Scale Scene Completion: Qingdong Xu,

Jiajun Zhu,

Shilin Zhu,

Xinjing He,

Chao Lu,

Huanran Wang,

Jiyao Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qingdong and Zhu, Jiajun and Zhu, Shilin and He, Xinjing and Lu, Chao and Wang, Huanran and Zhang, Jiyao}, title = {PatchScene: Patch-based Voxel Diffusion Model for Large-Scale Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16499-16508} }
MM-ReCoder: Advancing Chart-to-Code Generation with Reinforcement Learning and Self-Correction: Zitian Tang,

Xu Zhang,

Jianbo Yuan,

Yang Zou,

Varad Gunjal,

Songyao Jiang,

Davide Modolo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zitian and Zhang, Xu and Yuan, Jianbo and Zou, Yang and Gunjal, Varad and Jiang, Songyao and Modolo, Davide}, title = {MM-ReCoder: Advancing Chart-to-Code Generation with Reinforcement Learning and Self-Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22164-22173} }
HATS: Hardness-Aware Trajectory Synthesis for GUI Agents: Rui Shao,

Ruize Gao,

Bin Xie,

Yixing Li,

Kaiwen Zhou,

Shuai Wang,

Weili Guan,

Gongwei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Rui and Gao, Ruize and Xie, Bin and Li, Yixing and Zhou, Kaiwen and Wang, Shuai and Guan, Weili and Chen, Gongwei}, title = {HATS: Hardness-Aware Trajectory Synthesis for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27471-27481} }
SwiftTailor: Efficient 3D Garment Generation with Geometry Image Representation: Phuc Pham,

Uy Dieu Tran,

Binh-Son Hua,

Phong Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2026_CVPR, author = {Pham, Phuc and Tran, Uy Dieu and Hua, Binh-Son and Nguyen, Phong}, title = {SwiftTailor: Efficient 3D Garment Generation with Geometry Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27169-27178} }
BiFM: Bidirectional Flow Matching for Few-Step Image Editing and Generation: Yasong Dai,

Zeeshan Hayder,

David Ahmedt-Aristizabal,

Hongdong Li; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Yasong and Hayder, Zeeshan and Ahmedt-Aristizabal, David and Li, Hongdong}, title = {BiFM: Bidirectional Flow Matching for Few-Step Image Editing and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23325-23334} }
Vector Prism: Animating Vector Graphics by Stratifying Semantic Structure: Jooyeol Yun,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2026_CVPR, author = {Yun, Jooyeol and Choo, Jaegul}, title = {Vector Prism: Animating Vector Graphics by Stratifying Semantic Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17316-17325} }
CIGPose: Causal Intervention Graph Neural Network for Whole-Body Pose Estimation: Bohao Li,

Zhicheng Cao,

Huixian Li,

Yangming Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bohao and Cao, Zhicheng and Li, Huixian and Guo, Yangming}, title = {CIGPose: Causal Intervention Graph Neural Network for Whole-Body Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23484-23494} }
Lite Any Stereo: Efficient Zero-Shot Stereo Matching: Junpeng Jing,

Weixun Luo,

Ye Mao,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Junpeng and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {Lite Any Stereo: Efficient Zero-Shot Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21725-21735} }
Adaptive Bayesian Early-Exit Networks for Efficient Non-Transferable Learning: Siyu Luan,

Yan Li,

Zhong Chen,

Zhenyi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Luan_2026_CVPR, author = {Luan, Siyu and Li, Yan and Chen, Zhong and Wang, Zhenyi}, title = {Adaptive Bayesian Early-Exit Networks for Efficient Non-Transferable Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24514-24523} }
Beyond Objects: Contextual Synthetic Data Generation for Fine-Grained Classification: William Yang,

Xindi Wu,

Zhiwei Deng,

Esin Tureci,

Olga Russakovsky; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, William and Wu, Xindi and Deng, Zhiwei and Tureci, Esin and Russakovsky, Olga}, title = {Beyond Objects: Contextual Synthetic Data Generation for Fine-Grained Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22498-22508} }
Lighting in Motion: Spatiotemporal HDR Lighting Estimation: Christophe Bolduc,

Julien Philip,

Li Ma,

Mingming He,

Paul Debevec,

Jean-François Lalonde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bolduc_2026_CVPR, author = {Bolduc, Christophe and Philip, Julien and Ma, Li and He, Mingming and Debevec, Paul and Lalonde, Jean-Fran\c{c}ois}, title = {Lighting in Motion: Spatiotemporal HDR Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19696-19705} }
Bootstrap Your Own AV-Proxies: Adaptive Contrastive and Prototype Learning for Audio-Visual Segmentation: Junbo Zhang,

Hang Su,

Zhaofan Li,

Hang Dong,

Chao Sun; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junbo and Su, Hang and Li, Zhaofan and Dong, Hang and Sun, Chao}, title = {Bootstrap Your Own AV-Proxies: Adaptive Contrastive and Prototype Learning for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23061-23071} }
Self-Corrected Image Generation with Explainable Latent Rewards: Yinyi Luo,

Hrishikesh Gokhale,

Marios Savvides,

Jindong Wang,

Shengfeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Yinyi and Gokhale, Hrishikesh and Savvides, Marios and Wang, Jindong and He, Shengfeng}, title = {Self-Corrected Image Generation with Explainable Latent Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20088-20097} }
GIFSplat: Generative Prior-Guided Iterative Feed-Forward 3D Gaussian Splatting from Sparse Views: Tianyu Chen,

Wei Xiang,

Kang Han,

Yu Lu,

Di Wu,

Gaowen Liu,

Ramana Rao Kompella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tianyu and Xiang, Wei and Han, Kang and Lu, Yu and Wu, Di and Liu, Gaowen and Kompella, Ramana Rao}, title = {GIFSplat: Generative Prior-Guided Iterative Feed-Forward 3D Gaussian Splatting from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26031-26040} }
VRCLIP: Multimodal Canonical Correlation Alignment for CLIP-Driven Vision-Radio Person Re-Identification: Rui Zhang,

Yaqi Wang,

Yadong Li,

Ruixu Geng,

Jianyang Wang,

Qijun Ying,

Dongheng Zhang,

Yang Hu,

Yan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Rui and Wang, Yaqi and Li, Yadong and Geng, Ruixu and Wang, Jianyang and Ying, Qijun and Zhang, Dongheng and Hu, Yang and Chen, Yan}, title = {VRCLIP: Multimodal Canonical Correlation Alignment for CLIP-Driven Vision-Radio Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25536-25546} }
Hermite Radial Basis Function for Surface Reconstruction via Differentiable Rendering: Hugo Blanc,

Jean-Emmanuel Deschaud,

Alexis Paljic; [pdf] [supp]
[bibtex]
@InProceedings{Blanc_2026_CVPR, author = {Blanc, Hugo and Deschaud, Jean-Emmanuel and Paljic, Alexis}, title = {Hermite Radial Basis Function for Surface Reconstruction via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15376-15386} }
Learning 3D Representations for Spatial Intelligence from Unposed Multi-View Images: Bo Zhou,

Qiuxia Lai,

Zeren Sun,

Xiangbo Shu,

Yazhou Yao,

Wenguan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Bo and Lai, Qiuxia and Sun, Zeren and Shu, Xiangbo and Yao, Yazhou and Wang, Wenguan}, title = {Learning 3D Representations for Spatial Intelligence from Unposed Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22550-22560} }
V-DPM: 4D Video Reconstruction with Dynamic Point Maps: Edgar Sucar,

Eldar Insafutdinov,

Zihang Lai,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sucar_2026_CVPR, author = {Sucar, Edgar and Insafutdinov, Eldar and Lai, Zihang and Vedaldi, Andrea}, title = {V-DPM: 4D Video Reconstruction with Dynamic Point Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14502-14511} }
MMCP-GEN: A Modality-Extensible Diffusion Language Model for Conditional Protein Sequence Generation: Zeyu An,

Wanyu Lin,

Feng Tan,

Shujun Wang; [pdf] [supp]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Zeyu and Lin, Wanyu and Tan, Feng and Wang, Shujun}, title = {MMCP-GEN: A Modality-Extensible Diffusion Language Model for Conditional Protein Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15762-15772} }
Hierarchical Process Reward Models are Symbolic Vision Learners: Shan Zhang,

Aotian Chen,

Kai Zou,

Jindong Gu,

Yuan Xue,

Anton van den Hengel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shan and Chen, Aotian and Zou, Kai and Gu, Jindong and Xue, Yuan and van den Hengel, Anton}, title = {Hierarchical Process Reward Models are Symbolic Vision Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22185-22194} }
PPM-CLIP: Probabilistic Prompt Modeling for Generalizable AI-Generated Image Detection: Xinyuan Wang,

Yingxin Lai,

Zhiming Luo,

Zhihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyuan and Lai, Yingxin and Luo, Zhiming and Liu, Zhihui}, title = {PPM-CLIP: Probabilistic Prompt Modeling for Generalizable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21316-21325} }
Beyond Weak Supervision: MLLMs-Guided Graded Knowledge Distillation for Unsupervised Camouflaged Object Detection: Huafeng Chen,

Chenguang Zhu,

Yueming Lyu,

Caifeng Shan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Huafeng and Zhu, Chenguang and Lyu, Yueming and Shan, Caifeng}, title = {Beyond Weak Supervision: MLLMs-Guided Graded Knowledge Distillation for Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27547-27557} }
Annotation-Efficient Coreset Selection for Context-dependent Segmentation: Jin Zhang,

Zhe Cao,

Biwen Yang,

Ruiheng Zhang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jin and Cao, Zhe and Yang, Biwen and Zhang, Ruiheng}, title = {Annotation-Efficient Coreset Selection for Context-dependent Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20410-20420} }
DuoGen: Towards Autonomous Interleaved Multimodal Generation: Min Shi,

Xiaohui Zeng,

Jiannan Huang,

Yin Cui,

Francesco Ferroni,

Jialuo Li,

Zhaoshuo Li,

Yogesh Balaji,

Haoxiang Wang,

Tsung-Yi Lin,

Xiao Fu,

Yue Zhao,

Chieh-Yun Chen,

Ming-Yu Liu,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Min and Zeng, Xiaohui and Huang, Jiannan and Cui, Yin and Ferroni, Francesco and Li, Jialuo and Li, Zhaoshuo and Balaji, Yogesh and Wang, Haoxiang and Lin, Tsung-Yi and Fu, Xiao and Zhao, Yue and Chen, Chieh-Yun and Liu, Ming-Yu and Shi, Humphrey}, title = {DuoGen: Towards Autonomous Interleaved Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21901-21911} }
Feed-forward Gaussian Registration for Head Avatar Creation and Editing: Malte Prinzler,

Paulo Gotardo,

Siyu Tang,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Prinzler_2026_CVPR, author = {Prinzler, Malte and Gotardo, Paulo and Tang, Siyu and Bolkart, Timo}, title = {Feed-forward Gaussian Registration for Head Avatar Creation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25270-25280} }
Evidential Deep Partial Label Learning to Quantify Disambiguation Uncertainty: Jinfu Fan,

Jiangnan Li,

Xiaohui Zhong,

Kangrui Ren,

Zhencun Jiang,

Min Gan,

Tianhao Gu,

Linqing Huang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Jinfu and Li, Jiangnan and Zhong, Xiaohui and Ren, Kangrui and Jiang, Zhencun and Gan, Min and Gu, Tianhao and Huang, Linqing}, title = {Evidential Deep Partial Label Learning to Quantify Disambiguation Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24770-24779} }
REL-SF4PASS: Panoramic Semantic Segmentation with REL Depth Representation and Spherical Fusion: Xuewei Li,

Xinghan Bao,

Zhimin Chen,

Xi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuewei and Bao, Xinghan and Chen, Zhimin and Li, Xi}, title = {REL-SF4PASS: Panoramic Semantic Segmentation with REL Depth Representation and Spherical Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27676-27685} }
BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models: Shengao Wang,

Wenqi Wang,

Zecheng Wang,

Max Whitton,

Michael Wakeham,

Arjun Chandra,

Joey Huang,

Pengyue Zhu,

Helen Chen,

David Li,

Jeffrey Li,

Shawn Li,

Andrew Zagula,

Amy Zhao,

Andrew Zhu,

Sayaka Nakamura,

Yuki Yamamoto,

Jerry Jun Yokono,

Aaron Mueller,

Bryan A. Plummer,

Kate Saenko,

Venkatesh Saligrama,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shengao and Wang, Wenqi and Wang, Zecheng and Whitton, Max and Wakeham, Michael and Chandra, Arjun and Huang, Joey and Zhu, Pengyue and Chen, Helen and Li, David and Li, Jeffrey and Li, Shawn and Zagula, Andrew and Zhao, Amy and Zhu, Andrew and Nakamura, Sayaka and Yamamoto, Yuki and Yokono, Jerry Jun and Mueller, Aaron and Plummer, Bryan A. and Saenko, Kate and Saligrama, Venkatesh and Gong, Boqing}, title = {BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23696-23708} }
DeDelayed: Deleting Remote Inference Delay via On-Device Correction: Dan Jacobellis,

Mateen Ulhaq,

Fabien Racapé,

Hyomin Choi,

Neeraja J. Yadwadkar; [pdf] [supp]
[bibtex]
@InProceedings{Jacobellis_2026_CVPR, author = {Jacobellis, Dan and Ulhaq, Mateen and Racap\'e, Fabien and Choi, Hyomin and Yadwadkar, Neeraja J.}, title = {DeDelayed: Deleting Remote Inference Delay via On-Device Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19339-19348} }
Scene Reconstruction as Mapping Priors for 3D Detection: Yang Fu,

Yuliang Zou,

Hao Xiang,

Xin Huang,

Yijing Bai,

Chen Song,

Weijing Shi,

Govind Thattai,

Dragomir Anguelov,

Mingxing Tan,

Yingwei Li; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Yang and Zou, Yuliang and Xiang, Hao and Huang, Xin and Bai, Yijing and Song, Chen and Shi, Weijing and Thattai, Govind and Anguelov, Dragomir and Tan, Mingxing and Li, Yingwei}, title = {Scene Reconstruction as Mapping Priors for 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18734-18744} }
AlignPose: Generalizable 6D Pose Estimation via Multi-view Feature-metric Alignment: Anna Šárová Mikeštíková,

Médéric Fourmy,

Martin Cifka,

Josef Sivic,

Vladimir Petrik; [pdf] [supp]
[bibtex]
@InProceedings{Mikestikova_2026_CVPR, author = {Mike\v{s}t{\'\i}kov\'a, Anna \v{S}\'arov\'a and Fourmy, M\'ed\'eric and Cifka, Martin and Sivic, Josef and Petrik, Vladimir}, title = {AlignPose: Generalizable 6D Pose Estimation via Multi-view Feature-metric Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14626-14636} }
Towards Streaming Referring Video Segmentation via Large Language Model: Wenkang Zhang,

Kaicheng Yang,

Xiang An,

Qiang Li,

Ziyong Feng,

Wankou Yang,

Jiankang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenkang and Yang, Kaicheng and An, Xiang and Li, Qiang and Feng, Ziyong and Yang, Wankou and Deng, Jiankang}, title = {Towards Streaming Referring Video Segmentation via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24598-24607} }
NTK-Guided Implicit Neural Teaching: Chen Zhang,

Wei Zuo,

Bingyang Cheng,

Yikun Wang,

Wei-Bin Kou,

Yik-Chung Wu,

Ngai Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chen and Zuo, Wei and Cheng, Bingyang and Wang, Yikun and Kou, Wei-Bin and Wu, Yik-Chung and Wong, Ngai}, title = {NTK-Guided Implicit Neural Teaching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17248-17258} }
AdaRadar: Rate Adaptive Spectral Compression for Radar-based Perception: Jinho Park,

Se Young Chun,

Mingoo Seok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jinho and Chun, Se Young and Seok, Mingoo}, title = {AdaRadar: Rate Adaptive Spectral Compression for Radar-based Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19349-19359} }
Lighting-grounded Video Generation with Renderer-based Agent Reasoning: Ziqi Cai,

Taoyu Yang,

Zheng Chang,

Si Li,

Han Jiang,

Shuchen Weng,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Ziqi and Yang, Taoyu and Chang, Zheng and Li, Si and Jiang, Han and Weng, Shuchen and Shi, Boxin}, title = {Lighting-grounded Video Generation with Renderer-based Agent Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20055-20065} }
RefAV: Towards Planning-Centric Scenario Mining: Cainan Davidson,

Deva Ramanan,

Neehar Peri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Davidson_2026_CVPR, author = {Davidson, Cainan and Ramanan, Deva and Peri, Neehar}, title = {RefAV: Towards Planning-Centric Scenario Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21537-21548} }
GeoCoT: Towards Reliable Remote Sensing Reasoning with Manifold Perspective: Daixun Li,

Zirui Li,

Sibo He,

Jiayun Tian,

Mingxiang Cao,

Weiying Xie,

Yunke Wang,

Xin Zhang,

Yusi Zhang,

Yunsong Li,

Chang Xu,

Leyuan Fang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Daixun and Li, Zirui and He, Sibo and Tian, Jiayun and Cao, Mingxiang and Xie, Weiying and Wang, Yunke and Zhang, Xin and Zhang, Yusi and Li, Yunsong and Xu, Chang and Fang, Leyuan}, title = {GeoCoT: Towards Reliable Remote Sensing Reasoning with Manifold Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20575-20585} }
IMS3: Breaking Distributional Aggregation in Diffusion-Based Dataset Distillation: Chenru Wang,

Yunyi Chen,

Zijun Yang,

Joey Tianyi Zhou,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenru and Chen, Yunyi and Yang, Zijun and Zhou, Joey Tianyi and Zhang, Chi}, title = {IMS3: Breaking Distributional Aggregation in Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26667-26677} }
PhenoYieldNet: Learning Crop-Aware Phenological Responses for Multi-Crop Yield Prediction: Yu Luo,

Xiaogang Zhu,

Shan Zeng,

Wei Xiang,

Thomas Francis Bishop,

Zhiyong Wang,

Kun Hu; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Yu and Zhu, Xiaogang and Zeng, Shan and Xiang, Wei and Bishop, Thomas Francis and Wang, Zhiyong and Hu, Kun}, title = {PhenoYieldNet: Learning Crop-Aware Phenological Responses for Multi-Crop Yield Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15870-15879} }
ParTY: Part-Guidance for Expressive Text-to-Motion Synthesis: KunHo Heo,

SuYeon Kim,

Yonghyun Gwon,

Youngbin Kim,

MyeongAh Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2026_CVPR, author = {Heo, KunHo and Kim, SuYeon and Gwon, Yonghyun and Kim, Youngbin and Cho, MyeongAh}, title = {ParTY: Part-Guidance for Expressive Text-to-Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23549-23558} }
BOP-ASK: Object-Interaction Reasoning for Vision-Language Models: Vineet Bhat,

Sungsu Kim,

Valts Blukis,

Greg Heinrich,

Prashanth Krishnamurthy,

Ramesh Karri,

Stan Birchfield,

Farshad Khorrami,

Jonathan Tremblay; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhat_2026_CVPR, author = {Bhat, Vineet and Kim, Sungsu and Blukis, Valts and Heinrich, Greg and Krishnamurthy, Prashanth and Karri, Ramesh and Birchfield, Stan and Khorrami, Farshad and Tremblay, Jonathan}, title = {BOP-ASK: Object-Interaction Reasoning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16746-16757} }
TIGER: A Unified Framework for Time, Images and Geo-location Retrieval: David G. Shatwell,

Sirnam Swetha,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shatwell_2026_CVPR, author = {Shatwell, David G. and Swetha, Sirnam and Shah, Mubarak}, title = {TIGER: A Unified Framework for Time, Images and Geo-location Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23955-23965} }
Large-scale Codec Avatars: The Unreasonable Effectiveness of Large-scale Avatar Pretraining: Junxuan Li,

Rawal Khirodkar,

Egor Zakharov,

Jihyun Lee,

Zhaoen Su,

Yuan Dong,

Julieta Martinez,

Kai Li,

Qingyang Tan,

Takaaki Shiratori,

Matthew Hu,

Peihong Guo,

Xuhua Huang,

Zhongshi Jiang,

Lingchen Yang,

Ariyan Zarei,

Marco Pesavento,

Yichen Xu,

Chengan He,

He Wen,

Giljoo Nam,

Teng Deng,

Wyatt Borsos,

Anjali Thakrar,

Jean-Charles Bazin,

Rinat Abdrashitov,

Carsten Stoll,

Ginés Hidalgo,

James Booth,

Lucy Wang,

Xiaowen Ma,

Yu Rong,

Sairanjith Thalanki,

Chen Cao,

Christian Häne,

Abhishek Kar,

Sofien Bouaziz,

Jason Saragih,

Yaser Sheikh,

Shunsuke Saito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Junxuan and Khirodkar, Rawal and Zakharov, Egor and Lee, Jihyun and Su, Zhaoen and Dong, Yuan and Martinez, Julieta and Li, Kai and Tan, Qingyang and Shiratori, Takaaki and Hu, Matthew and Guo, Peihong and Huang, Xuhua and Jiang, Zhongshi and Yang, Lingchen and Zarei, Ariyan and Pesavento, Marco and Xu, Yichen and He, Chengan and Wen, He and Nam, Giljoo and Deng, Teng and Borsos, Wyatt and Thakrar, Anjali and Bazin, Jean-Charles and Abdrashitov, Rinat and Stoll, Carsten and Hidalgo, Gin\'es and Booth, James and Wang, Lucy and Ma, Xiaowen and Rong, Yu and Thalanki, Sairanjith and Cao, Chen and H\"ane, Christian and Kar, Abhishek and Bouaziz, Sofien and Saragih, Jason and Sheikh, Yaser and Saito, Shunsuke}, title = {Large-scale Codec Avatars: The Unreasonable Effectiveness of Large-scale Avatar Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18204-18215} }
Grounded Latents for Entity-Centric 4D Scene Generation: Jinhyung Park,

Navyata Sanghvi,

Erica Weng,

Shawn Hunt,

Shinya Tanaka,

Hironobu Fujiyoshi,

Kris Kitani; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jinhyung and Sanghvi, Navyata and Weng, Erica and Hunt, Shawn and Tanaka, Shinya and Fujiyoshi, Hironobu and Kitani, Kris}, title = {Grounded Latents for Entity-Centric 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21420-21430} }
Multimodal Learning on Low-Quality Data with Conformal Predictive Self-Calibration: Xun Jiang,

Yufan Gu,

Disen Hu,

Yuqing Hou,

Yazhou Yao,

Fumin Shen,

Heng Tao Shen,

Xing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xun and Gu, Yufan and Hu, Disen and Hou, Yuqing and Yao, Yazhou and Shen, Fumin and Shen, Heng Tao and Xu, Xing}, title = {Multimodal Learning on Low-Quality Data with Conformal Predictive Self-Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23041-23050} }
DynamicsBoost: Dynamic Plausible Video Generation via Annotation-Free Continuation Preference Optimization: Jiaxing Li,

Jiepeng Wang,

Junyao Gao,

Yang Liu,

Eric Li,

Bo An,

Hao-Xiang Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiaxing and Wang, Jiepeng and Gao, Junyao and Liu, Yang and Li, Eric and An, Bo and Guo, Hao-Xiang}, title = {DynamicsBoost: Dynamic Plausible Video Generation via Annotation-Free Continuation Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20024-20033} }
Hidden Dangers of Compositional Generation: Diagnosing Semantic Safety Failures in Text-to-Image Models: Haoming Yang,

Ke Ma,

Ligong Zhang,

Xiaojun Jia,

Yingfei Sun,

Qianqian Xu,

Qingming Huang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haoming and Ma, Ke and Zhang, Ligong and Jia, Xiaojun and Sun, Yingfei and Xu, Qianqian and Huang, Qingming}, title = {Hidden Dangers of Compositional Generation: Diagnosing Semantic Safety Failures in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15700-15709} }
iMontage: Unified, Versatile, Highly Dynamic Many-to-many Image Generation: Zhoujie Fu,

Xianfang Zeng,

Jinghong Lan,

Xinyao Liao,

Cheng Chen,

Junyi Chen,

Jiacheng Wei,

Wei Cheng,

Shiyu Liu,

Yunuo Chen,

Gang Yu,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Zhoujie and Zeng, Xianfang and Lan, Jinghong and Liao, Xinyao and Chen, Cheng and Chen, Junyi and Wei, Jiacheng and Cheng, Wei and Liu, Shiyu and Chen, Yunuo and Yu, Gang and Lin, Guosheng}, title = {iMontage: Unified, Versatile, Highly Dynamic Many-to-many Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16237-16247} }
M4-SAM: Multi-Modal Mixture-of-Experts with Memory-Augmented SAM for RGB-D Video Salient Object Detection: Jiyuan Liu,

Jia Lin,

Xiaofei Zhou,

Runmin Cong,

Deyang Liu,

Zhi Liu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiyuan and Lin, Jia and Zhou, Xiaofei and Cong, Runmin and Liu, Deyang and Liu, Zhi}, title = {M4-SAM: Multi-Modal Mixture-of-Experts with Memory-Augmented SAM for RGB-D Video Salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24970-24979} }
Circuit Mechanisms for Spatial Relation Generation in Diffusion Transformers: Binxu Wang,

Jingxuan Fan,

Xu Pan; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Binxu and Fan, Jingxuan and Pan, Xu}, title = {Circuit Mechanisms for Spatial Relation Generation in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23612-23621} }
2-Shots in the Dark: Low-Light Denoising with Minimal Data Acquisition: Liying Lu,

Raphael Achddou,

Sabine Süsstrunk; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Liying and Achddou, Raphael and S\"usstrunk, Sabine}, title = {2-Shots in the Dark: Low-Light Denoising with Minimal Data Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15496-15505} }
Coded-E2LF: Coded Aperture Light Field Imaging from Events: Tomoya Tsuchida,

Keita Takahashi,

Chihiro Tsutake,

Toshiaki Fujii,

Hajime Nagahara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tsuchida_2026_CVPR, author = {Tsuchida, Tomoya and Takahashi, Keita and Tsutake, Chihiro and Fujii, Toshiaki and Nagahara, Hajime}, title = {Coded-E2LF: Coded Aperture Light Field Imaging from Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19726-19736} }
General Process Reward Modeling for Robotic Reinforcement Learning: Huajie Tan,

Sixiang Chen,

Yijie Xu,

Zixiao Wang,

Cheng Chi,

Yuheng Ji,

Yaoxu Lyu,

Zhongxia Zhao,

Xiansheng Chen,

Peterson Co,

Shaoxuan Xie,

Guocai Yao,

Pengwei Wang,

Zhongyuan Wang,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Huajie and Chen, Sixiang and Xu, Yijie and Wang, Zixiao and Chi, Cheng and Ji, Yuheng and Lyu, Yaoxu and Zhao, Zhongxia and Chen, Xiansheng and Co, Peterson and Xie, Shaoxuan and Yao, Guocai and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {General Process Reward Modeling for Robotic Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22412-22422} }
Mistake Attribution: Fine-Grained Mistake Understanding in Egocentric Videos: Yayuan Li,

Aadit Jain,

Filippos Bellos,

Jason J. Corso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yayuan and Jain, Aadit and Bellos, Filippos and Corso, Jason J.}, title = {Mistake Attribution: Fine-Grained Mistake Understanding in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23966-23976} }
InfiniDepth: Arbitrary-Resolution and Fine-Grained Depth Estimation with Neural Implicit Fields: Hao Yu,

Haotong Lin,

Jiawei Wang,

Jiaxin Li,

Yida Wang,

Xueyang Zhang,

Yue Wang,

Xiaowei Zhou,

Ruizhen Hu,

Sida Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Hao and Lin, Haotong and Wang, Jiawei and Li, Jiaxin and Wang, Yida and Zhang, Xueyang and Wang, Yue and Zhou, Xiaowei and Hu, Ruizhen and Peng, Sida}, title = {InfiniDepth: Arbitrary-Resolution and Fine-Grained Depth Estimation with Neural Implicit Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26920-26930} }
ReLaGS: Relational Language Gaussian Splatting: Yaxu Xie,

Abdalla Arafa,

Alireza Javanmardi,

Christen Millerdurai,

Jia Cheng Hu,

Shaoxiang Wang,

Alain Pagani,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yaxu and Arafa, Abdalla and Javanmardi, Alireza and Millerdurai, Christen and Hu, Jia Cheng and Wang, Shaoxiang and Pagani, Alain and Stricker, Didier}, title = {ReLaGS: Relational Language Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23826-23836} }
UARE: A Unified Vision-Language Model for Image Quality Assessment, Restoration, and Enhancement: Weiqi Li,

Xuanyu Zhang,

Bin Chen,

Jingfen Xie,

Yan Wang,

Kexin Zhang,

Junlin Li,

Li zhang,

Jian Zhang,

Shijie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiqi and Zhang, Xuanyu and Chen, Bin and Xie, Jingfen and Wang, Yan and Zhang, Kexin and Li, Junlin and zhang, Li and Zhang, Jian and Zhao, Shijie}, title = {UARE: A Unified Vision-Language Model for Image Quality Assessment, Restoration, and Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22689-22702} }
UVU: Improving Multimodal Understanding via Vision-Language Unified Autoregressive Paradigm: Zhehan Kan,

Xinghua Jiang,

Yanlin Liu,

Xiaochen Yang,

Zhixiang Wei,

Shifeng Liu,

Yubo Zhu,

Qingmin Liao,

Wenming Yang,

Xin Li,

Yinsong Liu,

Deqiang Jiang,

Xing Sun; [pdf] [supp]
[bibtex]
@InProceedings{Kan_2026_CVPR, author = {Kan, Zhehan and Jiang, Xinghua and Liu, Yanlin and Yang, Xiaochen and Wei, Zhixiang and Liu, Shifeng and Zhu, Yubo and Liao, Qingmin and Yang, Wenming and Li, Xin and Liu, Yinsong and Jiang, Deqiang and Sun, Xing}, title = {UVU: Improving Multimodal Understanding via Vision-Language Unified Autoregressive Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26230-26239} }
WPT: World-to-Policy Transfer via Online World Model Distillation: Guangfeng Jiang,

Yueru Luo,

Jun Liu,

Yi Huang,

Yiyao Zhu,

Zhan Qu,

Dave Zhenyu Chen,

Bingbing Liu,

Xu Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Guangfeng and Luo, Yueru and Liu, Jun and Huang, Yi and Zhu, Yiyao and Qu, Zhan and Chen, Dave Zhenyu and Liu, Bingbing and Yan, Xu}, title = {WPT: World-to-Policy Transfer via Online World Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17842-17852} }
MaskDiME: Adaptive Masked Diffusion for Precise and Efficient Visual Counterfactual Explanations: Changlu Guo,

Anders Nymark Christensen,

Anders Bjorholm Dahl,

Morten Rieger Hannemose; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Changlu and Christensen, Anders Nymark and Dahl, Anders Bjorholm and Hannemose, Morten Rieger}, title = {MaskDiME: Adaptive Masked Diffusion for Precise and Efficient Visual Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24118-24128} }
EmoDiffTalk: Emotion-aware Diffusion for Editable 3D Gaussian Talking Head: Chang Liu,

Tianjiao Jing,

Chengcheng Ma,

Xuanqi Zhou,

Zhengxuan Lian,

Qin Jin,

Hongliang Yuan,

Shi-Sheng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chang and Jing, Tianjiao and Ma, Chengcheng and Zhou, Xuanqi and Lian, Zhengxuan and Jin, Qin and Yuan, Hongliang and Huang, Shi-Sheng}, title = {EmoDiffTalk: Emotion-aware Diffusion for Editable 3D Gaussian Talking Head}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18063-18073} }
3D-Object Perception Transformer (3PT): Agastya Kalra,

Tim Salzmann,

Guy Stoppi,

Dmitrii Marin,

Rishav Agarwal,

Vage Taamazyan,

Martin Bokeloh,

Stefan Hinterstoisser,

Anton Boykov,

Alberto Dall'Olio,

Pravin Dangol,

Kartik Venkataraman,

Huaijin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Kalra_2026_CVPR, author = {Kalra, Agastya and Salzmann, Tim and Stoppi, Guy and Marin, Dmitrii and Agarwal, Rishav and Taamazyan, Vage and Bokeloh, Martin and Hinterstoisser, Stefan and Boykov, Anton and Dall'Olio, Alberto and Dangol, Pravin and Venkataraman, Kartik and Chen, Huaijin}, title = {3D-Object Perception Transformer (3PT)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25777-25787} }
InterPrior: Scaling Generative Control for Physics-Based Human-Object Interactions: Sirui Xu,

Samuel Schulter,

Morteza Ziyadi,

Xialin He,

Xiaohan Fei,

Yu-Xiong Wang,

Liang-Yan Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Sirui and Schulter, Samuel and Ziyadi, Morteza and He, Xialin and Fei, Xiaohan and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterPrior: Scaling Generative Control for Physics-Based Human-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23516-23527} }
FastGaMer: Efficient GainMap Learning for Practical Inverse Tone Mapping: Yuanshen Guan,

Ruikang Xu,

Chang Chen,

Yinuo Liao,

Dehua Song,

Fenglong Song,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Yuanshen and Xu, Ruikang and Chen, Chang and Liao, Yinuo and Song, Dehua and Song, Fenglong and Xiong, Zhiwei}, title = {FastGaMer: Efficient GainMap Learning for Practical Inverse Tone Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22703-22712} }
Looking Beyond the Window: Global-Local Aligned CLIP for Training-free Open-Vocabulary Semantic Segmentation: ByeongCheol Lee,

Hyun Seok Seong,

Sangeek Hyun,

Gilhan Park,

WonJun Moon,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, ByeongCheol and Seong, Hyun Seok and Hyun, Sangeek and Park, Gilhan and Moon, WonJun and Heo, Jae-Pil}, title = {Looking Beyond the Window: Global-Local Aligned CLIP for Training-free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27686-27696} }
Expert-Teacher-Student Collaborative Learning for Domain Adaptive Object Detection: Yiming Cui,

Liang Li,

Haibing Yin,

Yuhan Gao,

Xichun Sheng,

Chenggang Yan; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Yiming and Li, Liang and Yin, Haibing and Gao, Yuhan and Sheng, Xichun and Yan, Chenggang}, title = {Expert-Teacher-Student Collaborative Learning for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25557-25567} }
SG-LoRA: Semantic-guided LoRA Parameters Generation: Miaoge Li,

Yang Chen,

Zhijie Rao,

Can Jiang,

Kang Wei,

Jingcai Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Miaoge and Chen, Yang and Rao, Zhijie and Jiang, Can and Wei, Kang and Guo, Jingcai}, title = {SG-LoRA: Semantic-guided LoRA Parameters Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22206-22216} }
FunFact: Building Probabilistic Functional 3D Scene Graphs via Factor-Graph Reasoning: Zhengyu Fu,

René Zurbrügg,

Kaixian Qu,

Marc Pollefeys,

Marco Hutter,

Hermann Blum,

Zuria Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Zhengyu and Zurbr\"ugg, Ren\'e and Qu, Kaixian and Pollefeys, Marc and Hutter, Marco and Blum, Hermann and Bauer, Zuria}, title = {FunFact: Building Probabilistic Functional 3D Scene Graphs via Factor-Graph Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23848-23858} }
OpenDPR: Open-Vocabulary Change Detection via Vision-Centric Diffusion-Guided Prototype Retrieval for Remote Sensing Imagery: Qi Guo,

Jue Wang,

Yinhe Liu,

Yanfei Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Qi and Wang, Jue and Liu, Yinhe and Zhong, Yanfei}, title = {OpenDPR: Open-Vocabulary Change Detection via Vision-Centric Diffusion-Guided Prototype Retrieval for Remote Sensing Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20399-20409} }
NeighborMAE: Exploiting Spatial Dependencies between Neighboring Earth Observation Images in Masked Autoencoders Pretraining: Liang Zeng,

Valerio Marsocci,

Wufan Zhao,

Andrea Nascetti,

Maarten Vergauwen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Liang and Marsocci, Valerio and Zhao, Wufan and Nascetti, Andrea and Vergauwen, Maarten}, title = {NeighborMAE: Exploiting Spatial Dependencies between Neighboring Earth Observation Images in Masked Autoencoders Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20597-20607} }
A Cross-view Fusion Framework for Robust 6-DoF Grasp Pose Estimation: Kangjian Zhu,

Haobo Jiang,

Jianjun Qian,

Jin Xie; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kangjian and Jiang, Haobo and Qian, Jianjun and Xie, Jin}, title = {A Cross-view Fusion Framework for Robust 6-DoF Grasp Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28052-28061} }
MAD: Motion Appearance Decoupling for efficient Driving World Models: Ahmad Rahimi,

Valentin Gerard,

Eloi Zablocki,

Matthieu Cord,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahimi_2026_CVPR, author = {Rahimi, Ahmad and Gerard, Valentin and Zablocki, Eloi and Cord, Matthieu and Alahi, Alexandre}, title = {MAD: Motion Appearance Decoupling for efficient Driving World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18364-18374} }
Learning to Generate via Understanding: Understanding-Driven Intrinsic Rewarding for Unified Multimodal Models: Jiadong Pan,

Liang Li,

Yuxin Peng,

Yu-Ming Tang,

Shuohuan Wang,

Yu Sun,

Hua Wu,

Qingming Huang,

Haifeng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Jiadong and Li, Liang and Peng, Yuxin and Tang, Yu-Ming and Wang, Shuohuan and Sun, Yu and Wu, Hua and Huang, Qingming and Wang, Haifeng}, title = {Learning to Generate via Understanding: Understanding-Driven Intrinsic Rewarding for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22174-22184} }
DARC: Dual Adjustment Reasoning with Counterfactuals for Trustworthy Chest X-ray Classification: Zhifang Liao,

Junhao Li,

HaoKang Ding,

Yucheng Song; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Zhifang and Li, Junhao and Ding, HaoKang and Song, Yucheng}, title = {DARC: Dual Adjustment Reasoning with Counterfactuals for Trustworthy Chest X-ray Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28234-28243} }
Explaining Object Detectors via Collective Contribution of Pixels: Toshinori Yamauchi,

Hiroshi Kera,

Kazuhiko Kawamoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamauchi_2026_CVPR, author = {Yamauchi, Toshinori and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Explaining Object Detectors via Collective Contribution of Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17046-17056} }
HUMAPS-4D: A Multimodal Dataset for HUman Motion Analysis with Physiological and Semantic informations: Matthieu Dabrowski,

Ouala Ben Jemaa,

Benjamin Allaert; [pdf] [supp]
[bibtex]
@InProceedings{Dabrowski_2026_CVPR, author = {Dabrowski, Matthieu and Ben Jemaa, Ouala and Allaert, Benjamin}, title = {HUMAPS-4D: A Multimodal Dataset for HUman Motion Analysis with Physiological and Semantic informations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21188-21197} }
CHIRP dataset: towards long-term, individual-level, behavioral monitoring of bird populations in the wild: Alex Hoi Hang Chan,

Neha Singhal,

Onur Kocahan,

Andrea Meltzer,

Saverio Lubrano,

Miyako H. Warrington,

Michael Griesser,

Fumihiro Kano,

Hemal Naik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chan_2026_CVPR, author = {Chan, Alex Hoi Hang and Singhal, Neha and Kocahan, Onur and Meltzer, Andrea and Lubrano, Saverio and Warrington, Miyako H. and Griesser, Michael and Kano, Fumihiro and Naik, Hemal}, title = {CHIRP dataset: towards long-term, individual-level, behavioral monitoring of bird populations in the wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18428-18439} }
MuM: Multi-View Masked Image Modeling for 3D Vision: David Nordström,

Johan Edstedt,

Fredrik Kahl,

Georg Bökman; [pdf] [supp]
[bibtex]
@InProceedings{Nordstrom_2026_CVPR, author = {Nordstr\"om, David and Edstedt, Johan and Kahl, Fredrik and B\"okman, Georg}, title = {MuM: Multi-View Masked Image Modeling for 3D Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21736-21747} }
LightSplat: Fast and Memory-Efficient Open-Vocabulary 3D Scene Understanding in Five Seconds: Jaehun Bang,

Jinhyeok Kim,

Minji Kim,

Seungheon Jeong,

Kyungdon Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bang_2026_CVPR, author = {Bang, Jaehun and Kim, Jinhyeok and Kim, Minji and Jeong, Seungheon and Joo, Kyungdon}, title = {LightSplat: Fast and Memory-Efficient Open-Vocabulary 3D Scene Understanding in Five Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19812-19821} }
NEC-Diff: Noise-Robust Event-RAW Complementary Diffusion for Seeing Motion in Extreme Darkness: Haoyue Liu,

Jinghan Xu,

Luxin Feng,

Hanyu Zhou,

Haozhi Zhao,

Yi Chang,

Luxin Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haoyue and Xu, Jinghan and Feng, Luxin and Zhou, Hanyu and Zhao, Haozhi and Chang, Yi and Yan, Luxin}, title = {NEC-Diff: Noise-Robust Event-RAW Complementary Diffusion for Seeing Motion in Extreme Darkness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22281-22290} }
GeoSANE: Learning Geospatial Representations from Models, Not Data: Joëlle Hanna,

Damian Falk,

Stella X. Yu,

Damian Borth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hanna_2026_CVPR, author = {Hanna, Jo\"elle and Falk, Damian and Yu, Stella X. and Borth, Damian}, title = {GeoSANE: Learning Geospatial Representations from Models, Not Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27804-27814} }
VidTAG: Temporally Aligned Video to GPS Geolocalization with Denoising Sequence Prediction at a Global Scale: Parth Parag Kulkarni,

Rohit Gupta,

Prakash Chandra Chhipa,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Parth Parag and Gupta, Rohit and Chhipa, Prakash Chandra and Shah, Mubarak}, title = {VidTAG: Temporally Aligned Video to GPS Geolocalization with Denoising Sequence Prediction at a Global Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23977-23987} }
MARCO: Navigating the Unseen Space of Semantic Correspondence: Claudia Cuttano,

Gabriele Trivigno,

Carlo Masone,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cuttano_2026_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Masone, Carlo and Roth, Stefan}, title = {MARCO: Navigating the Unseen Space of Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21649-21658} }
Realiz3D: 3D Generation Made Photorealistic via Domain-Aware Learning: Ido Sobol,

Kihyuk Sohn,

Yoav Blum,

Egor Zakharov,

Max Bluvstein,

Andrea Vedaldi,

Or Litany; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sobol_2026_CVPR, author = {Sobol, Ido and Sohn, Kihyuk and Blum, Yoav and Zakharov, Egor and Bluvstein, Max and Vedaldi, Andrea and Litany, Or}, title = {Realiz3D: 3D Generation Made Photorealistic via Domain-Aware Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27072-27081} }
Brewing Stronger Features: Dual-Teacher Distillation for Multispectral Earth Observation: Filip Wolf,

Blaž Rolih,

Luka Čehovin Zajc; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wolf_2026_CVPR, author = {Wolf, Filip and Rolih, Bla\v{z} and Zajc, Luka \v{C}ehovin}, title = {Brewing Stronger Features: Dual-Teacher Distillation for Multispectral Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27815-27826} }
Differentiable Vector Quantization for Rate-Distortion Optimization of Generative Image Compression: Shiyin Jiang,

Wei Long,

Minghao Han,

Zhenghao Chen,

Ce Zhu,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Shiyin and Long, Wei and Han, Minghao and Chen, Zhenghao and Zhu, Ce and Gu, Shuhang}, title = {Differentiable Vector Quantization for Rate-Distortion Optimization of Generative Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14440-14450} }
Spatial-Spectral Residuals Informed Diffusion Neural Operator for Pan-sharpening: Jiahan Huang,

Ran Ran,

Junming Hou,

Zihao Chen,

Xiaofeng Cong,

Junling Li,

Liang-Jian Deng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiahan and Ran, Ran and Hou, Junming and Chen, Zihao and Cong, Xiaofeng and Li, Junling and Deng, Liang-Jian}, title = {Spatial-Spectral Residuals Informed Diffusion Neural Operator for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23642-23651} }
AntiStyler: Defending Object Detection Models Against Adversarial Patch Attacks Using Style Removal: Idan Yankelev,

Edita Grolman,

Yarin Yerushalmi Levi,

Amit Giloni,

Omer Hofman,

Toshiya Shimizu,

Yuval Elovici,

Asaf Shabtai; [pdf] [supp]
[bibtex]
@InProceedings{Yankelev_2026_CVPR, author = {Yankelev, Idan and Grolman, Edita and Levi, Yarin Yerushalmi and Giloni, Amit and Hofman, Omer and Shimizu, Toshiya and Elovici, Yuval and Shabtai, Asaf}, title = {AntiStyler: Defending Object Detection Models Against Adversarial Patch Attacks Using Style Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27936-27945} }
EMGauss: Continuous Slice-to-3D Reconstruction via Dynamic Gaussian Modeling in Volume Electron Microscopy: Yumeng He,

Zanwei Zhou,

Yekun Zheng,

Chen Liang,

Yunbo Wang,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yumeng and Zhou, Zanwei and Zheng, Yekun and Liang, Chen and Wang, Yunbo and Yang, Xiaokang}, title = {EMGauss: Continuous Slice-to-3D Reconstruction via Dynamic Gaussian Modeling in Volume Electron Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15606-15615} }
A Stitch in Time: Learning Procedural Workflow via Self-Supervised Plackett-Luce Ranking: Chengan Che,

Chao Wang,

Xinyue Chen,

Sophia Tsoka,

Luis C. Garcia-Peraza-Herrera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Chen, Xinyue and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {A Stitch in Time: Learning Procedural Workflow via Self-Supervised Plackett-Luce Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17000-17010} }
ConeSep: Cone-based Robust Noise-Unlearning Compositional Network for Composed Image Retrieval: Zixu Li,

Yupeng Hu,

Zhiwei Chen,

Mingyu Zhang,

Zhiheng Fu,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zixu and Hu, Yupeng and Chen, Zhiwei and Zhang, Mingyu and Fu, Zhiheng and Nie, Liqiang}, title = {ConeSep: Cone-based Robust Noise-Unlearning Compositional Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16897-16909} }
Foundry: Distilling 3D Foundation Models for the Edge: Guillaume Letellier,

Siddharth Srivastava,

Frederic Jurie,

Gaurav Sharma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Letellier_2026_CVPR, author = {Letellier, Guillaume and Srivastava, Siddharth and Jurie, Frederic and Sharma, Gaurav}, title = {Foundry: Distilling 3D Foundation Models for the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17194-17203} }
Seeing Through Blur: Tackling Defocus in Spike-Based Imaging: Xiantao Ma,

Siwei Dong,

Lin Zhu,

Lizhi Wang,

Hua Huang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Xiantao and Dong, Siwei and Zhu, Lin and Wang, Lizhi and Huang, Hua}, title = {Seeing Through Blur: Tackling Defocus in Spike-Based Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19676-19685} }
Generalized and Personalized Federated Learning with Black-Box Foundation Models via Orthogonal Transformations: Eun Gyung Kong,

Jewon Yeom,

Yonghoon Jeon,

Taesup Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Eun Gyung and Yeom, Jewon and Jeon, Yonghoon and Kim, Taesup}, title = {Generalized and Personalized Federated Learning with Black-Box Foundation Models via Orthogonal Transformations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24567-24576} }
SVAgent: Storyline-guided Long Video Understanding via Cross-Modal Multi-Agent Collaboration: Zhongyu Yang,

Zuhao Yang,

Shuo Zhan,

Tan Yue,

Wei Pang,

Yingfang Yuan; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhongyu and Yang, Zuhao and Zhan, Shuo and Yue, Tan and Pang, Wei and Yuan, Yingfang}, title = {SVAgent: Storyline-guided Long Video Understanding via Cross-Modal Multi-Agent Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24062-24072} }
Dynamic Exposure Burst Image Restoration: Woohyeok Kim,

Jaesung Rim,

Daeyeon Kim,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Woohyeok and Rim, Jaesung and Kim, Daeyeon and Cho, Sunghyun}, title = {Dynamic Exposure Burst Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15550-15560} }
RelightAnyone: A Generalized Relightable 3D Gaussian Head Model: Yingyan Xu,

Studios 0000-0002-8076-1947,

Pramod Rao,

Sebastian Weiss,

Studios blank,

Gaspard Zoss,

Studios blank,

Markus Gross,

Studios,

ETH Zurich blank,

Christian Theobalt,

Marc Habermann,

Derek Bradley,

Studios blank; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yingyan and 0000-0002-8076-1947, Studios and Rao, Pramod and Weiss, Sebastian and blank, Studios and Zoss, Gaspard and blank, Studios and Gross, Markus and Studios and blank, ETH Zurich and Theobalt, Christian and Habermann, Marc and Bradley, Derek and blank, Studios}, title = {RelightAnyone: A Generalized Relightable 3D Gaussian Head Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25258-25269} }
GIFT: Global Irreplaceability Frame Targeting for Efficient Video Understanding: Junpeng Ma,

Sashuai Zhou,

Guanghao Li,

Xin Gao,

Yue Cao,

Hengyu Zeng,

Yuxiang Yan,

Zhibin Wang,

Jun Song,

Bo Zheng,

Shanghang Zhang,

Jian Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Junpeng and Zhou, Sashuai and Li, Guanghao and Gao, Xin and Cao, Yue and Zeng, Hengyu and Yan, Yuxiang and Wang, Zhibin and Song, Jun and Zheng, Bo and Zhang, Shanghang and Pu, Jian}, title = {GIFT: Global Irreplaceability Frame Targeting for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25610-25620} }
FEAT: Fashion Editing and Try-On from Any Design: Soye Kwon,

Keonyoung Lee,

Dahuin Jung,

Jaekoo Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Soye and Lee, Keonyoung and Jung, Dahuin and Lee, Jaekoo}, title = {FEAT: Fashion Editing and Try-On from Any Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22080-22089} }
TopoMesh: High-Fidelity Mesh Autoencoding via Topological Unification: Guan Luo,

Xiu Li,

Rui Chen,

Xuanyu Yi,

Jing Lin,

Chia Hao Chen,

Jiahang Liu,

Song-Hai Zhang,

Jianfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Guan and Li, Xiu and Chen, Rui and Yi, Xuanyu and Lin, Jing and Chen, Chia Hao and Liu, Jiahang and Zhang, Song-Hai and Zhang, Jianfeng}, title = {TopoMesh: High-Fidelity Mesh Autoencoding via Topological Unification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27082-27092} }
TALO: Pushing 3D Vision Foundation Models Towards Globally Consistent Online Reconstruction: Fengyi Zhang,

Tianjun Zhang,

Kasra Khosoussi,

Zheng Zhang,

Zi Huang,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Fengyi and Zhang, Tianjun and Khosoussi, Kasra and Zhang, Zheng and Huang, Zi and Luo, Yadan}, title = {TALO: Pushing 3D Vision Foundation Models Towards Globally Consistent Online Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21870-21879} }
VIMCAN: Visual-Inertial 3D Human Pose Estimation with Hybrid Mamba-Cross-Attention Network: Zepeng Yang,

Junxuan Bai,

Hao Li,

Ju Dai,

Junjun Pan,

Yongfeng Yin,

Bin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zepeng and Bai, Junxuan and Li, Hao and Dai, Ju and Pan, Junjun and Yin, Yongfeng and Li, Bin}, title = {VIMCAN: Visual-Inertial 3D Human Pose Estimation with Hybrid Mamba-Cross-Attention Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28458-28467} }
SGS-Intrinsic: Semantic-Invariant Gaussian Splatting for Sparse-View Indoor Inverse Rendering: Jiahao Niu,

Rongjia Zheng,

Wenju Xu,

Wei-Shi Zheng,

Qing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Jiahao and Zheng, Rongjia and Xu, Wenju and Zheng, Wei-Shi and Zhang, Qing}, title = {SGS-Intrinsic: Semantic-Invariant Gaussian Splatting for Sparse-View Indoor Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26021-26030} }
COPYLENS: Towards Copyrighted Characters Infringement Detection via Copyright-Aware Prompt Learning: Yaoyu Jin,

Xiaochun Yang,

Hong Liu,

Leixia Wang,

Jian Li,

Rui Ding,

Bin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Yaoyu and Yang, Xiaochun and Liu, Hong and Wang, Leixia and Li, Jian and Ding, Rui and Wang, Bin}, title = {COPYLENS: Towards Copyrighted Characters Infringement Detection via Copyright-Aware Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24492-24502} }
Text-Phase Synergy Network with Dual Priors for Unsupervised Cross-Domain Image Retrieval: Jing Yang,

Hui Xue,

Shipeng Zhu,

Pengfei Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Xue, Hui and Zhu, Shipeng and Fang, Pengfei}, title = {Text-Phase Synergy Network with Dual Priors for Unsupervised Cross-Domain Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23891-23900} }
Learning complete and explainable visual representations from itemized text supervision: Yiwei Lyu,

Chenhui Zhao,

Soumyanil Banerjee,

Shixuan Liu,

Akshay Rao,

Akhil Kondepudi,

Honglak Lee,

Todd C. Hollon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Yiwei and Zhao, Chenhui and Banerjee, Soumyanil and Liu, Shixuan and Rao, Akshay and Kondepudi, Akhil and Lee, Honglak and Hollon, Todd C.}, title = {Learning complete and explainable visual representations from itemized text supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21110-21120} }
Charge: A Comprehensive Novel View Synthesis Benchmark and Dataset to Bind Them All: Michal Nazarczuk,

Thomas Tanay,

Arthur Moreau,

Zhensong Zhang,

Eduardo Pérez-Pellitero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nazarczuk_2026_CVPR, author = {Nazarczuk, Michal and Tanay, Thomas and Moreau, Arthur and Zhang, Zhensong and P\'erez-Pellitero, Eduardo}, title = {Charge: A Comprehensive Novel View Synthesis Benchmark and Dataset to Bind Them All}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15323-15333} }
Your Latent Mask is Wrong: Pixel-Equivalent Latent Compositing for Diffusion Models: Rowan Bradbury,

Dazhi Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bradbury_2026_CVPR, author = {Bradbury, Rowan and Zhong, Dazhi}, title = {Your Latent Mask is Wrong: Pixel-Equivalent Latent Compositing for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18630-18639} }
NAF: Zero-Shot Feature Upsampling via Neighborhood Attention Filtering: Loïck Chambon,

Paul Couairon,

Éloi Zablocki,

Alexandre Boulch,

Nicolas Thome,

Matthieu Cord; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chambon_2026_CVPR, author = {Chambon, Lo{\"\i}ck and Couairon, Paul and Zablocki, \'Eloi and Boulch, Alexandre and Thome, Nicolas and Cord, Matthieu}, title = {NAF: Zero-Shot Feature Upsampling via Neighborhood Attention Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26604-26613} }
AR2-4FV: Anchored Referring and Re-identification for Long-Term Grounding in Fixed-View Videos: Teng Yan,

Yihan Liu,

Jiongxu Chen,

Teng Wang,

Jiaqi Li,

Bingzhuo Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Teng and Liu, Yihan and Chen, Jiongxu and Wang, Teng and Li, Jiaqi and Zhong, Bingzhuo}, title = {AR2-4FV: Anchored Referring and Re-identification for Long-Term Grounding in Fixed-View Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17568-17577} }
Sparse-View Localization via Online Neural 3D Regression: Ludvig Dillén,

Magnus Oskarsson,

Viktor Larsson; [pdf] [supp]
[bibtex]
@InProceedings{Dillen_2026_CVPR, author = {Dill\'en, Ludvig and Oskarsson, Magnus and Larsson, Viktor}, title = {Sparse-View Localization via Online Neural 3D Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21794-21804} }
PhysGen: Physically Grounded 3D Shape Generation for Industrial Design: Yingxuan You,

Chen Zhao,

Hantao Zhang,

Ming Xu,

Pascal Fua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Yingxuan and Zhao, Chen and Zhang, Hantao and Xu, Ming and Fua, Pascal}, title = {PhysGen: Physically Grounded 3D Shape Generation for Industrial Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27209-27218} }
OraPO: Oracle-educated Reinforcement Learning for Data-efficient and Factual Radiology Report Generation: Zhuoxiao Chen,

Hongyang Yu,

Ying Xu,

Yadan Luo,

Long Duong,

Yuan-Fang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuoxiao and Yu, Hongyang and Xu, Ying and Luo, Yadan and Duong, Long and Li, Yuan-Fang}, title = {OraPO: Oracle-educated Reinforcement Learning for Data-efficient and Factual Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28275-28287} }
PA-Attack: Guiding Gray-Box Attacks on LVLM Vision Encoders with Prototypes and Attention: Hefei Mei,

Zirui Wang,

Chang Xu,

Jianyuan Guo,

Minjing Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2026_CVPR, author = {Mei, Hefei and Wang, Zirui and Xu, Chang and Guo, Jianyuan and Dong, Minjing}, title = {PA-Attack: Guiding Gray-Box Attacks on LVLM Vision Encoders with Prototypes and Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15679-15688} }
Widget2Code: From Visual Widgets to UI Code via Multimodal LLMs: Houston H. Zhang,

Tao Zhang,

Baoze Lin,

Yuanqi Xue,

Yincheng Zhu,

Huan Liu,

Li Gu,

Linfeng Ye,

Ziqiang Wang,

Xinxin Zuo,

Yang Wang,

Yuanhao Yu,

Zhixiang Chi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Houston H. and Zhang, Tao and Lin, Baoze and Xue, Yuanqi and Zhu, Yincheng and Liu, Huan and Gu, Li and Ye, Linfeng and Wang, Ziqiang and Zuo, Xinxin and Wang, Yang and Yu, Yuanhao and Chi, Zhixiang}, title = {Widget2Code: From Visual Widgets to UI Code via Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20293-20302} }
Partial Weakly-Supervised Oriented Object Detection: Mingxin Liu,

Peiyuan Zhang,

Yuan Liu,

Wei Zhang,

Yue Zhou,

Ning Liao,

Ziyang Gong,

Junwei Luo,

Zhirui Wang,

Yi Yu,

Xue Yang; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mingxin and Zhang, Peiyuan and Liu, Yuan and Zhang, Wei and Zhou, Yue and Liao, Ning and Gong, Ziyang and Luo, Junwei and Wang, Zhirui and Yu, Yi and Yang, Xue}, title = {Partial Weakly-Supervised Oriented Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27644-27654} }
When Robots Should Say ''I Don't Know'': Benchmarking Abstention in Embodied Question Answering: Tao Wu,

Chuhao Zhou,

Guangyu Zhao,

Haozhi Cao,

Yewen Pu,

Jianfei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Zhou, Chuhao and Zhao, Guangyu and Cao, Haozhi and Pu, Yewen and Yang, Jianfei}, title = {When Robots Should Say ''I Don't Know'': Benchmarking Abstention in Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15266-15275} }
FMPose3D: monocular 3D pose estimation via flow matching: Ti Wang,

Xiaohang Yu,

Mackenzie Weygandt Mathis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ti and Yu, Xiaohang and Mathis, Mackenzie Weygandt}, title = {FMPose3D: monocular 3D pose estimation via flow matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14669-14679} }
VABench: A Comprehensive Benchmark for Audio-Video Generation: Daili Hua,

Xizhi Wang,

Bohan Zeng,

Xinyi Huang,

Hao Liang,

Junbo Niu,

Xinlong Chen,

Quanqing Xu,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2026_CVPR, author = {Hua, Daili and Wang, Xizhi and Zeng, Bohan and Huang, Xinyi and Liang, Hao and Niu, Junbo and Chen, Xinlong and Xu, Quanqing and Zhang, Wentao}, title = {VABench: A Comprehensive Benchmark for Audio-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23345-23355} }
A Sanity Check for Multi-In-Domain Face Forgery Detection in the Real World: Jikang Cheng,

Renye Yan,

Zhiyuan Yan,

Yaozhong Gan,

Xueyi Zhang,

Zhongyuan Wang,

Wei Peng,

Ling Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Jikang and Yan, Renye and Yan, Zhiyuan and Gan, Yaozhong and Zhang, Xueyi and Wang, Zhongyuan and Peng, Wei and Liang, Ling}, title = {A Sanity Check for Multi-In-Domain Face Forgery Detection in the Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21306-21315} }
ProPhy: Progressive Physical Alignment for Dynamic World Simulation: Zijun Wang,

Panwen Hu,

Jing Wang,

Terry Jingchen Zhang,

Yuhao Cheng,

Long Chen,

Yiqiang Yan,

Zutao Jiang,

Hanhui Li,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zijun and Hu, Panwen and Wang, Jing and Zhang, Terry Jingchen and Cheng, Yuhao and Chen, Long and Yan, Yiqiang and Jiang, Zutao and Li, Hanhui and Liang, Xiaodan}, title = {ProPhy: Progressive Physical Alignment for Dynamic World Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14492-14501} }
Semantic Scale Space: A Framework for Controllable Image Abstraction: Kazu Mishiba; [pdf] [supp]
[bibtex]
@InProceedings{Mishiba_2026_CVPR, author = {Mishiba, Kazu}, title = {Semantic Scale Space: A Framework for Controllable Image Abstraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17367-17376} }
x^2-Fusion: Cross-Modality and Cross-Dimension Flow Estimation in Event Edge Space: Ruishan Guo,

Ciyu Ruan,

Haoyang Wang,

Zihang Gong,

Jingao Xu,

Xinlei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ruishan and Ruan, Ciyu and Wang, Haoyang and Gong, Zihang and Xu, Jingao and Chen, Xinlei}, title = {x{\textasciicircum}2-Fusion: Cross-Modality and Cross-Dimension Flow Estimation in Event Edge Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15145-15155} }
Block-based Learned Image Compression without Blocking Artifacts: Jong Wook Kim,

Suyong Bahk,

TaeHwa Lee,

HyunDong Cho,

Donghyun Kim,

Sung-Chang Lim,

Jin Soo Choi,

Hui Yong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jong Wook and Bahk, Suyong and Lee, TaeHwa and Cho, HyunDong and Kim, Donghyun and Lim, Sung-Chang and Choi, Jin Soo and Kim, Hui Yong}, title = {Block-based Learned Image Compression without Blocking Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19330-19338} }
FRAMER: Frequency-Aligned Self-Distillation with Adaptive Modulation Leveraging Diffusion Priors for Real-World Image Super-Resolution: Seungho Choi,

Jeahun Sung,

Jihyong Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Seungho and Sung, Jeahun and Oh, Jihyong}, title = {FRAMER: Frequency-Aligned Self-Distillation with Adaptive Modulation Leveraging Diffusion Priors for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23451-23461} }
SoccerMaster: A Vision Foundation Model for Soccer Understanding: Haolin Yang,

Jiayuan Rao,

Haoning Wu,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haolin and Rao, Jiayuan and Wu, Haoning and Xie, Weidi}, title = {SoccerMaster: A Vision Foundation Model for Soccer Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21549-21560} }
Bridging the 2D-3D Gap: A Hierarchical Semantic-Geometric Map for Vision Language Navigation: Kailing Li,

Tianwen Qian,

Lijin Yang,

Yuqian Fu,

Jingyu Gong,

Xiaoling Wang,

Liang He; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kailing and Qian, Tianwen and Yang, Lijin and Fu, Yuqian and Gong, Jingyu and Wang, Xiaoling and He, Liang}, title = {Bridging the 2D-3D Gap: A Hierarchical Semantic-Geometric Map for Vision Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15243-15252} }
VSRELL: A Simple Baseline for Video Super-Resolution and Enhancement in Low-Light Environment: Yanming Hui,

Fanhua Shang,

Hongying Liu,

Ben Wang,

Zhenwei Zhang,

Liang Wan,

Wei Feng,

Tong Xue,

Bingqin Lv; [pdf] [supp]
[bibtex]
@InProceedings{Hui_2026_CVPR, author = {Hui, Yanming and Shang, Fanhua and Liu, Hongying and Wang, Ben and Zhang, Zhenwei and Wan, Liang and Feng, Wei and Xue, Tong and Lv, Bingqin}, title = {VSRELL: A Simple Baseline for Video Super-Resolution and Enhancement in Low-Light Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16345-16354} }
RoMo: A Large-Scale, Richly Organized Dataset and Semantic Taxonomy for Human Motion Generation: Jiahao Zhang,

Joseph Liu,

Young-Yoon Lee,

Seonghyeon Moon,

Victor Zordan,

Guy Tevet,

C. Karen Liu,

Stephen Gould,

Oren Jacob,

Haomiao Jiang,

Mubbasir Kapadia,

Yizhak Ben-Shabat; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiahao and Liu, Joseph and Lee, Young-Yoon and Moon, Seonghyeon and Zordan, Victor and Tevet, Guy and Liu, C. Karen and Gould, Stephen and Jacob, Oren and Jiang, Haomiao and Kapadia, Mubbasir and Ben-Shabat, Yizhak}, title = {RoMo: A Large-Scale, Richly Organized Dataset and Semantic Taxonomy for Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16408-16419} }
Stronger Normalization-Free Transformers: Mingzhi Chen,

Taiming Lu,

Jiachen Zhu,

Mingjie Sun,

Zhuang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Mingzhi and Lu, Taiming and Zhu, Jiachen and Sun, Mingjie and Liu, Zhuang}, title = {Stronger Normalization-Free Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27418-27428} }
Residual Decoder Adapter: ID-Preserving Tokenizer Adaption for Autoregressive Text Rendering: Dongxing Mao,

Alex Jinpeng Wang,

Jiahao Tang,

Kevin Qinghong Lin,

Linjie Li,

Zhengyuan Yang,

Lijuan Wang,

Min Li,

Jingru Tan; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Dongxing and Wang, Alex Jinpeng and Tang, Jiahao and Lin, Kevin Qinghong and Li, Linjie and Yang, Zhengyuan and Wang, Lijuan and Li, Min and Tan, Jingru}, title = {Residual Decoder Adapter: ID-Preserving Tokenizer Adaption for Autoregressive Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22017-22027} }
A More Word-like Image Tokenization for MLLMs: Hyun Lee,

Hyemin Jeong,

Yejin Kim,

Hyungwook Choi,

Hyunsoo Cho,

Soo Kyung Kim,

Joonseok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Hyun and Jeong, Hyemin and Kim, Yejin and Choi, Hyungwook and Cho, Hyunsoo and Kim, Soo Kyung and Lee, Joonseok}, title = {A More Word-like Image Tokenization for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17641-17650} }
Turning Pre-Trained Vision Transformers into End-to-End Histopathology Whole Slide Image Models for Survival Prediction: Jiawen Li,

Jiali Hu,

Xitong Ling,

Renao Yan,

Yuxuan Chen,

Tian Guan,

Yonghong He; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiawen and Hu, Jiali and Ling, Xitong and Yan, Renao and Chen, Yuxuan and Guan, Tian and He, Yonghong}, title = {Turning Pre-Trained Vision Transformers into End-to-End Histopathology Whole Slide Image Models for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21046-21056} }
CARLoS: Retrieval via Concise Assessment Representation of LoRAs at Scale: Shahar Sarfaty,

Adi Haviv,

Uri Hacohen,

Niva Elkin-Koren,

Roi Livni,

Amit H. Bermano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarfaty_2026_CVPR, author = {Sarfaty, Shahar and Haviv, Adi and Hacohen, Uri and Elkin-Koren, Niva and Livni, Roi and Bermano, Amit H.}, title = {CARLoS: Retrieval via Concise Assessment Representation of LoRAs at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23922-23932} }
Dual-Agent Reinforcement Learning for Adaptive and Cost-Aware Visual-Inertial Odometry: Feiyang Pan,

Shenghe Zheng,

Chunyan Yin,

Guangbin Dou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Feiyang and Zheng, Shenghe and Yin, Chunyan and Dou, Guangbin}, title = {Dual-Agent Reinforcement Learning for Adaptive and Cost-Aware Visual-Inertial Odometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24885-24894} }
FastHybrid: Accelerating Hybrid Autoregressive Image Generation with Lookahead and Guided Decoding: Zhengguo Jiang,

Fang Zhang,

Yongxiang Hua,

Bocheng Li,

Wentao Zhang,

Linli Xu; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhengguo and Zhang, Fang and Hua, Yongxiang and Li, Bocheng and Zhang, Wentao and Xu, Linli}, title = {FastHybrid: Accelerating Hybrid Autoregressive Image Generation with Lookahead and Guided Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23204-23214} }
Towards Balanced Multi-Modal Learning in 3D Human Pose Estimation: Mengshi Qi,

Jiaxuan Peng,

Xianlin Zhang,

Huadong Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Mengshi and Peng, Jiaxuan and Zhang, Xianlin and Ma, Huadong}, title = {Towards Balanced Multi-Modal Learning in 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21231-21241} }
Best Segmentation Buddies for Image-Shape Correspondence: Itai Lang,

Dongwei Lyu,

Dale Decatur,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lang_2026_CVPR, author = {Lang, Itai and Lyu, Dongwei and Decatur, Dale and Hanocka, Rana}, title = {Best Segmentation Buddies for Image-Shape Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20499-20510} }
SynthRGB-T: Language-Vision Guided Image Translation for Diversity Synthesis: Jiangang Ding,

Yiquan Du,

Pengxiang Li,

Lili Pei,

Yuanlin Zhao,

Wei Li; [pdf]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Jiangang and Du, Yiquan and Li, Pengxiang and Pei, Lili and Zhao, Yuanlin and Li, Wei}, title = {SynthRGB-T: Language-Vision Guided Image Translation for Diversity Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17259-17269} }
Few-Shot Incremental 3D Object Detection in Dynamic Indoor Environments: Yun Zhu,

Jianjun Qian,

Jian Yang,

Jin Xie,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yun and Qian, Jianjun and Yang, Jian and Xie, Jin and Zhao, Na}, title = {Few-Shot Incremental 3D Object Detection in Dynamic Indoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18786-18795} }
DDSF: Robust Few-Shot Learning via Disentangled Subspaces with Determinantal Point Process: Xulun Ye,

Yifan Mei,

Kun Zhou,

Zelei Wu,

Jieyu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Mei, Yifan and Zhou, Kun and Wu, Zelei and Zhao, Jieyu}, title = {DDSF: Robust Few-Shot Learning via Disentangled Subspaces with Determinantal Point Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19591-19601} }
CATNet: Collaborative Alignment and Transformation Network for Cooperative Perception: Gong Chen,

Chaokun Zhang,

Tao Tang,

Pengcheng Lv,

Feng Li,

Xin Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Tang, Tao and Lv, Pengcheng and Li, Feng and Xie, Xin}, title = {CATNet: Collaborative Alignment and Transformation Network for Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18724-18733} }
Expanding Spatial and Temporal Context for Robotic Imitation Learning With Scene Graphs: Jianing Qian,

Qinhe Peng,

Emmanuel Panov,

Leonor Fermoselle,

Dinesh Jayaraman,

Bernadette Bucher,

Tarik Kelestemur; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Jianing and Peng, Qinhe and Panov, Emmanuel and Fermoselle, Leonor and Jayaraman, Dinesh and Bucher, Bernadette and Kelestemur, Tarik}, title = {Expanding Spatial and Temporal Context for Robotic Imitation Learning With Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28010-28020} }
Towards Training-free Scene Text Editing: Yubo Li,

Xugong Qin,

Peng Zhang,

Hailun Lin,

Gangyan Zeng,

Kexin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yubo and Qin, Xugong and Zhang, Peng and Lin, Hailun and Zeng, Gangyan and Zhang, Kexin}, title = {Towards Training-free Scene Text Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15291-15301} }
Prototype-as-Prompt: Multimodal Sentiment Prototypes Endowing Large Language Models the Capability to Perform Multimodal Sentiment Analysis: Xianbing Zhao,

Lan Luo,

Hengyang Lu,

Buzhou Tang; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xianbing and Luo, Lan and Lu, Hengyang and Tang, Buzhou}, title = {Prototype-as-Prompt: Multimodal Sentiment Prototypes Endowing Large Language Models the Capability to Perform Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23010-23020} }
TROPHIES: Temporal Reconstruction of Places, Humans, and Cameras from Multi-view Videos: Jinpeng Liu,

Yukang Xu,

Yutong Li,

Xingyu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinpeng and Xu, Yukang and Li, Yutong and Liu, Xingyu}, title = {TROPHIES: Temporal Reconstruction of Places, Humans, and Cameras from Multi-view Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21154-21164} }
SVHalluc: Benchmarking Speech-Vision Hallucination in Audio-Visual Large Language Models: Chenshuang Zhang,

Kyeong Seon Kim,

Chengxin Liu,

Tae-Hyun Oh; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenshuang and Kim, Kyeong Seon and Liu, Chengxin and Oh, Tae-Hyun}, title = {SVHalluc: Benchmarking Speech-Vision Hallucination in Audio-Visual Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25304-25314} }
Modeling the Visual Ambiguity of Human Sketches: Yang Zhou,

Ping Ni,

Jin Wang,

Senyun Jia,

Jingdan Yan,

Kaixiang Huang,

Guodong Lu,

Jingru Yang,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yang and Ni, Ping and Wang, Jin and Jia, Senyun and Yan, Jingdan and Huang, Kaixiang and Lu, Guodong and Yang, Jingru and He, Shengfeng}, title = {Modeling the Visual Ambiguity of Human Sketches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16876-16886} }
SGAD-SLAM: Splatting Gaussians at Adjusted Depth for Better Radiance Fields in RGBD SLAM: Pengchong Hu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Pengchong and Han, Zhizhong}, title = {SGAD-SLAM: Splatting Gaussians at Adjusted Depth for Better Radiance Fields in RGBD SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18934-18945} }
YieldSAT: A Multimodal Benchmark Dataset for High-Resolution Crop Yield Prediction: Miro Miranda,

Deepak Pathak,

Patrick Helber,

Benjamin Bischke,

Hiba Najjar,

Francisco Mena,

Cristhian Sanchez,

Akshay Pai,

Diego Arenas,

Matias Valdenegro-Toro,

Marcela Charfuelan,

Marlon Nuske,

Andreas Dengel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miranda_2026_CVPR, author = {Miranda, Miro and Pathak, Deepak and Helber, Patrick and Bischke, Benjamin and Najjar, Hiba and Mena, Francisco and Sanchez, Cristhian and Pai, Akshay and Arenas, Diego and Valdenegro-Toro, Matias and Charfuelan, Marcela and Nuske, Marlon and Dengel, Andreas}, title = {YieldSAT: A Multimodal Benchmark Dataset for High-Resolution Crop Yield Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22920-22930} }
FrankenMotion: Part-level Human Motion Generation and Composition: Chuqiao Li,

Xianghui Xie,

Yong Cao,

Andreas Geiger,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chuqiao and Xie, Xianghui and Cao, Yong and Geiger, Andreas and Pons-Moll, Gerard}, title = {FrankenMotion: Part-level Human Motion Generation and Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16420-16431} }
Disentanglement-wise Image Dehazing through Cross-Domain Manifold Consensus: Tianyi Lyu,

Mingye Ju,

Kai-Kuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Tianyi and Ju, Mingye and Ma, Kai-Kuang}, title = {Disentanglement-wise Image Dehazing through Cross-Domain Manifold Consensus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22733-22743} }
Revisiting Sparsity Constraint Under High-Rank Property in Partial Multi-Label Learning: Chongjie Si,

Yidan Cui,

Fuchao Yang,

Wei Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Si_2026_CVPR, author = {Si, Chongjie and Cui, Yidan and Yang, Fuchao and Shen, Wei}, title = {Revisiting Sparsity Constraint Under High-Rank Property in Partial Multi-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17724-17733} }
One-Step Diffusion Transformer for Controllable Real-World Image Super-Resolution: Yushun Fang,

Yuxiang Chen,

Shibo Yin,

Qiang Hu,

Jiangchao Yao,

Ya Zhang,

Xiaoyun Zhang,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Yushun and Chen, Yuxiang and Yin, Shibo and Hu, Qiang and Yao, Jiangchao and Zhang, Ya and Zhang, Xiaoyun and Wang, Yanfeng}, title = {One-Step Diffusion Transformer for Controllable Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23440-23450} }
Reconstructing Spiking Neural Networks Using a Single Neuron with Autapses: Wuque Cai,

Hongze Sun,

Quan Tang,

Shifeng Mao,

Zhenxing Wang,

Jiayi He,

Duo Chen,

Dezhong Yao,

Daqing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Wuque and Sun, Hongze and Tang, Quan and Mao, Shifeng and Wang, Zhenxing and He, Jiayi and Chen, Duo and Yao, Dezhong and Guo, Daqing}, title = {Reconstructing Spiking Neural Networks Using a Single Neuron with Autapses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20283-20292} }
SARL-STG: A Spatially Aware Reinforcement Learning Framework for Refining MLLMs in Spatio-Temporal Video Grounding: Hong Gao,

Xiangkai Xu,

Bin Zhong,

Junjie Yin,

Fangyu Kang,

Yutong Xu,

Xiugang Dong,

Xurui Gao,

Min-Ling Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Hong and Xu, Xiangkai and Zhong, Bin and Yin, Junjie and Kang, Fangyu and Xu, Yutong and Dong, Xiugang and Gao, Xurui and Zhang, Min-Ling}, title = {SARL-STG: A Spatially Aware Reinforcement Learning Framework for Refining MLLMs in Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24630-24639} }
ColorFLUX: A Structure-Color Decoupling Framework for Old Photo Colorization: Bingchen Li,

Zhixin Wang,

Fan Li,

Jiaqi Xu,

Jiaming Guo,

Renjing Pei,

Xin Li,

Zhibo Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingchen and Wang, Zhixin and Li, Fan and Xu, Jiaqi and Guo, Jiaming and Pei, Renjing and Li, Xin and Chen, Zhibo}, title = {ColorFLUX: A Structure-Color Decoupling Framework for Old Photo Colorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15574-15584} }
Cross-Subject EEG-to-Video Reconstruction and Beyond: Runduo Han,

Hongchen Tan; [pdf]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Runduo and Tan, Hongchen}, title = {Cross-Subject EEG-to-Video Reconstruction and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23294-23303} }
MeteorPred: A Meteorological Multimodal Large Model and Dataset for Severe Weather Event Prediction: Shuo Tang,

Jian Xu,

Jiadong Zhang,

Yi Chen,

Qizhao Jin,

Lingdong Shen,

Chenglin Liu,

Shiming Xiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Shuo and Xu, Jian and Zhang, Jiadong and Chen, Yi and Jin, Qizhao and Shen, Lingdong and Liu, Chenglin and Xiang, Shiming}, title = {MeteorPred: A Meteorological Multimodal Large Model and Dataset for Severe Weather Event Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22910-22919} }
SkyReels-Text: Fine-Grained Font-Controllable Text Editing for Poster Design: Yunjie Yu,

Jingchen Wu,

Junchen Zhu,

Chunze Lin,

Guibin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yunjie and Wu, Jingchen and Zhu, Junchen and Lin, Chunze and Chen, Guibin}, title = {SkyReels-Text: Fine-Grained Font-Controllable Text Editing for Poster Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14875-14884} }
SIGMA: A Physics-Based Benchmark for Gas Chimney Understanding in Seismic Images: Bao Truong,

Quang Nguyen,

Baoru Huang,

Jinpei Han,

Van Nguyen,

Ngan Le,

Minh-Tan Pham,

Doan Huy Hien,

Anh Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Truong_2026_CVPR, author = {Truong, Bao and Nguyen, Quang and Huang, Baoru and Han, Jinpei and Nguyen, Van and Le, Ngan and Pham, Minh-Tan and Hien, Doan Huy and Nguyen, Anh}, title = {SIGMA: A Physics-Based Benchmark for Gas Chimney Understanding in Seismic Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20542-20552} }
Keep It Frozen: Domain-Routed Conditional Residual Modulation for Multi-Domain Vision Transformers: Ufaq Khan,

Umair Nawaz,

Massimo Caputo,

Muhammad Bilal,

Junaid Qadir,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{Khan_2026_CVPR, author = {Khan, Ufaq and Nawaz, Umair and Caputo, Massimo and Bilal, Muhammad and Qadir, Junaid and Khan, Muhammad Haris}, title = {Keep It Frozen: Domain-Routed Conditional Residual Modulation for Multi-Domain Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21016-21025} }
WorldMM: Dynamic Multimodal Memory Agent for Long Video Reasoning: Woongyeong Yeo,

Kangsan Kim,

Jaehong Yoon,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeo_2026_CVPR, author = {Yeo, Woongyeong and Kim, Kangsan and Yoon, Jaehong and Hwang, Sung Ju}, title = {WorldMM: Dynamic Multimodal Memory Agent for Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25599-25609} }
GaussianPile: A Unified Sparse Gaussian Splatting Framework for Slice-based Volumetric Reconstruction: Di Kong,

Yikai Wang,

Wenjie Guo,

Yifan Bu,

Boya Zhang,

Yuexin Duan,

Xiawei Yue,

Wenbiao Du,

Yiman Zhong,

Yuwen Chen,

Cheng Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Di and Wang, Yikai and Guo, Wenjie and Bu, Yifan and Zhang, Boya and Duan, Yuexin and Yue, Xiawei and Du, Wenbiao and Zhong, Yiman and Chen, Yuwen and Ma, Cheng}, title = {GaussianPile: A Unified Sparse Gaussian Splatting Framework for Slice-based Volumetric Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19022-19032} }
Grid Distillation: Compositional Image Distillation via Structured Generative Grids: Biplab Ch Das,

Shouvik Das,

Viswanath Gopalakrishnan; [pdf] [supp]
[bibtex]
@InProceedings{Das_2026_CVPR, author = {Das, Biplab Ch and Das, Shouvik and Gopalakrishnan, Viswanath}, title = {Grid Distillation: Compositional Image Distillation via Structured Generative Grids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19644-19653} }
SpotEdit: Selective Region Editing in Diffusion Transformers: Zhibin Qin,

Zhenxiong Tan,

Zeqing Wang,

Songhua Liu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Zhibin and Tan, Zhenxiong and Wang, Zeqing and Liu, Songhua and Wang, Xinchao}, title = {SpotEdit: Selective Region Editing in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18683-18692} }
EgoPoseFormer v2: Accurate Egocentric Human Motion Estimation for AR/VR: Zhenyu Li,

Sai Kumar Dwivedi,

Filip Maric,

Carlos Chacón,

Nadine Bertsch,

Filippo Arcadu,

Tomas Hodan,

Michael Ramamonjisoa,

Peter Wonka,

Amy Zhao,

Robin Kips,

Cem Keskin,

Anastasia Tkach,

Chenhongyi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhenyu and Dwivedi, Sai Kumar and Maric, Filip and Chac\'on, Carlos and Bertsch, Nadine and Arcadu, Filippo and Hodan, Tomas and Ramamonjisoa, Michael and Wonka, Peter and Zhao, Amy and Kips, Robin and Keskin, Cem and Tkach, Anastasia and Yang, Chenhongyi}, title = {EgoPoseFormer v2: Accurate Egocentric Human Motion Estimation for AR/VR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21121-21131} }
Skullptor: High Fidelity 3D Head Reconstruction in Seconds with Multi-View Normal Prediction: Noé Artru,

Rukhshanda Hussain,

Emeline Got,

Alexandre Messier,

David B. Lindell,

Abdallah Dib; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Artru_2026_CVPR, author = {Artru, No\'e and Hussain, Rukhshanda and Got, Emeline and Messier, Alexandre and Lindell, David B. and Dib, Abdallah}, title = {Skullptor: High Fidelity 3D Head Reconstruction in Seconds with Multi-View Normal Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25248-25257} }
ODGS-SLAM: Omnidirectional Gaussian Splatting SLAM: Stefan Spiss,

Joey Hieronimy,

Marcel Ritter,

Matthias Harders; [pdf] [supp]
[bibtex]
@InProceedings{Spiss_2026_CVPR, author = {Spiss, Stefan and Hieronimy, Joey and Ritter, Marcel and Harders, Matthias}, title = {ODGS-SLAM: Omnidirectional Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26114-26123} }
Factorize, Reconstruct, Enhance: A Unified Framework for Multimodal Sentiment Analysis: Zhilu Yang,

Mingcheng Li; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhilu and Li, Mingcheng}, title = {Factorize, Reconstruct, Enhance: A Unified Framework for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15860-15869} }
GS-ASM: 2DGS-Supervised Active Stereo Matching: Zhengling Wu,

Rongfeng Lu,

Quan Chen,

Longjian Zeng,

Ming Lu,

Yaoqi Sun,

Yahong Chen,

Baofeng Ji,

Chenggang Yan; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhengling and Lu, Rongfeng and Chen, Quan and Zeng, Longjian and Lu, Ming and Sun, Yaoqi and Chen, Yahong and Ji, Baofeng and Yan, Chenggang}, title = {GS-ASM: 2DGS-Supervised Active Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26888-26898} }
EV-CGNet: Co-visible Focused 3D-guided 2D Event Keypoint Detection Network: Yuan Gao,

Tianle Ding,

Yuqing Zhu,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuan and Ding, Tianle and Zhu, Yuqing and Zhang, Tianzhu}, title = {EV-CGNet: Co-visible Focused 3D-guided 2D Event Keypoint Detection Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15105-15114} }
Meta-FC: Meta-Learning with Feature Consistency for Robust and Generalizable Watermarking: Yuheng Li,

Weitong Chen,

Chengcheng Zhu,

Jiale Zhang,

Chunpeng Ge,

Di Wu,

Guodong Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuheng and Chen, Weitong and Zhu, Chengcheng and Zhang, Jiale and Ge, Chunpeng and Wu, Di and Long, Guodong}, title = {Meta-FC: Meta-Learning with Feature Consistency for Robust and Generalizable Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17420-17429} }
Depth Peeling for High-Fidelity Gaussian-Enhanced Surfel Rendering: Keyang Ye,

Hongzhi Wu,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Keyang and Wu, Hongzhi and Zhou, Kun}, title = {Depth Peeling for High-Fidelity Gaussian-Enhanced Surfel Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22561-22570} }
Hierarchical Point-Patch Fusion with Adaptive Patch Codebook for 3D Shape Anomaly Detection: Xueyang Kang,

Zizhao Li,

Tian Lan,

Dong Gong,

Kourosh Khoshelham,

Liangliang Nan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Xueyang and Li, Zizhao and Lan, Tian and Gong, Dong and Khoshelham, Kourosh and Nan, Liangliang}, title = {Hierarchical Point-Patch Fusion with Adaptive Patch Codebook for 3D Shape Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24258-24267} }
Confidence-Guided Multi-Scale Aggregation for Sparse-View High-Resolution 3D Gaussian Splatting: Qinzheng Zhou,

Zaychik Liu,

Lijing Lu,

Zhihang Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qinzheng and Liu, Zaychik and Lu, Lijing and Li, Zhihang}, title = {Confidence-Guided Multi-Scale Aggregation for Sparse-View High-Resolution 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19054-19064} }
DMAligner: Enhancing Image Alignment via Diffusion Model Based View Synthesis: Xinglong Luo,

Ao Luo,

Zhengning Wang,

Yueqi Yang,

Chaoyu Feng,

Lei Lei,

Bing Zeng,

Shuaicheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Xinglong and Luo, Ao and Wang, Zhengning and Yang, Yueqi and Feng, Chaoyu and Lei, Lei and Zeng, Bing and Liu, Shuaicheng}, title = {DMAligner: Enhancing Image Alignment via Diffusion Model Based View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16541-16550} }
Retrieving Counterfactuals Improves Visual In-Context Learning: Guangzhi Xiong,

Sanchit Sinha,

Zhenghao He,

Aidong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Guangzhi and Sinha, Sanchit and He, Zhenghao and Zhang, Aidong}, title = {Retrieving Counterfactuals Improves Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24352-24362} }
LeapAlign: Post-training Flow Matching Models at Any Generation Step by Building Two-Step Trajectories: Zhanhao Liang,

Tao Yang,

Jie Wu,

Chengjian Feng,

Liang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhanhao and Yang, Tao and Wu, Jie and Feng, Chengjian and Zheng, Liang}, title = {LeapAlign: Post-training Flow Matching Models at Any Generation Step by Building Two-Step Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23238-23248} }
EmbodiedSplat: Online Feed-Forward Semantic 3DGS for Open-Vocabulary 3D Scene Understanding: Seungjun Lee,

Zihan Wang,

Yunsong Wang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seungjun and Wang, Zihan and Wang, Yunsong and Lee, Gim Hee}, title = {EmbodiedSplat: Online Feed-Forward Semantic 3DGS for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23774-23784} }
From Infusion to Assimilation Distillation for Medical Image Segmentation: Jiankang Hong,

Ye Luo,

Yinan Liu,

Junsong Yuan; [pdf]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Jiankang and Luo, Ye and Liu, Yinan and Yuan, Junsong}, title = {From Infusion to Assimilation Distillation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20985-20995} }
UniGenDet: A Unified Generative-Discriminative Framework for Co-Evolutionary Image Generation and Generated Image Detection: Yanran Zhang,

Wenzhao Zheng,

Yifei Li,

Bingyao Yu,

Yu Zheng,

Lei Chen,

Jiwen Lu,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yanran and Zheng, Wenzhao and Li, Yifei and Yu, Bingyao and Zheng, Yu and Chen, Lei and Lu, Jiwen and Zhou, Jie}, title = {UniGenDet: A Unified Generative-Discriminative Framework for Co-Evolutionary Image Generation and Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16226-16236} }
SABER: Spatially Consistent 3D Universal Adversarial Objects for BEV Detectors: Aixuan Li,

Mochu Xiang,

Bosen Hou,

Zhexiong Wan,

Jing Zhang,

Yuchao Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Aixuan and Xiang, Mochu and Hou, Bosen and Wan, Zhexiong and Zhang, Jing and Dai, Yuchao}, title = {SABER: Spatially Consistent 3D Universal Adversarial Objects for BEV Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25841-25850} }
EMR-Diff: Edge-aware Multimodal Residual Diffusion Model for Hyperspectral Image Super-resolution: Tao Zhang,

Shengtao Yao,

Rong Zeng,

Zunjie Zhu,

Bolun Zheng,

Yaoqi Sun,

Ying Fu,

Chenggang Yan; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tao and Yao, Shengtao and Zeng, Rong and Zhu, Zunjie and Zheng, Bolun and Sun, Yaoqi and Fu, Ying and Yan, Chenggang}, title = {EMR-Diff: Edge-aware Multimodal Residual Diffusion Model for Hyperspectral Image Super-resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23419-23429} }
I2I-Bench: A Comprehensive Benchmark Suite for Image-to-Image Editing Models: Juntong Wang,

Jiarui Wang,

Huiyu Duan,

Jiaxiang Kang,

Guangtao Zhai,

Xiongkuo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Juntong and Wang, Jiarui and Duan, Huiyu and Kang, Jiaxiang and Zhai, Guangtao and Min, Xiongkuo}, title = {I2I-Bench: A Comprehensive Benchmark Suite for Image-to-Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15354-15364} }
Lumosaic: Hyperspectral Video via Active Illumination and Coded-Exposure Pixels: Dhruv Verma,

Andrew Qiu,

Roberto Rangel,

Ayandev Barman,

Hao Yang,

Chenjia Hu,

Fengqi Zhang,

Roman Genov,

David B. Lindell,

Kiriakos N. Kutulakos,

Alex Mariakakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Verma_2026_CVPR, author = {Verma, Dhruv and Qiu, Andrew and Rangel, Roberto and Barman, Ayandev and Yang, Hao and Hu, Chenjia and Zhang, Fengqi and Genov, Roman and Lindell, David B. and Kutulakos, Kiriakos N. and Mariakakis, Alex}, title = {Lumosaic: Hyperspectral Video via Active Illumination and Coded-Exposure Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26761-26771} }
SAMosaic3D: Modular Scene Assembly for Real-Time 3D Segment Anything: Peng Wang,

Yongcai Wang,

Wang Chen,

Hualong Cao,

Kang Yang,

Chunxu Li,

Jie Wen,

Deying Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Peng and Wang, Yongcai and Chen, Wang and Cao, Hualong and Yang, Kang and Li, Chunxu and Wen, Jie and Li, Deying}, title = {SAMosaic3D: Modular Scene Assembly for Real-Time 3D Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17894-17904} }
Flat-Pack Bench: Evaluating Spatio-Temporal Understanding in Large Vision-Language Models through Furniture Assembly: Aditya Chetan,

Eric Cai,

Peeyush Kushwaha,

Bharath Raj Nagoor Kani,

Utkarsh Mall,

Qianqian Wang,

Noah Snavely,

Bharath Hariharan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chetan_2026_CVPR, author = {Chetan, Aditya and Cai, Eric and Kushwaha, Peeyush and Kani, Bharath Raj Nagoor and Mall, Utkarsh and Wang, Qianqian and Snavely, Noah and Hariharan, Bharath}, title = {Flat-Pack Bench: Evaluating Spatio-Temporal Understanding in Large Vision-Language Models through Furniture Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16624-16634} }
VDFE: Difference-Aware 3D Scene Editing with Non-Intrusive Video Diffusion Priors for Multi-View Consistency and Efficiency: Chao Zhang,

Fang Liu,

Shuo Li,

Yang Liu,

Jiahao Wang,

Xinyan Huang,

Lingling Li,

Puhua Chen,

Xu Liu,

Wenping Ma,

Siqi Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chao and Liu, Fang and Li, Shuo and Liu, Yang and Wang, Jiahao and Huang, Xinyan and Li, Lingling and Chen, Puhua and Liu, Xu and Ma, Wenping and Yu, Siqi}, title = {VDFE: Difference-Aware 3D Scene Editing with Non-Intrusive Video Diffusion Priors for Multi-View Consistency and Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18375-18385} }
RigMo: Unifying Rig and Motion Learning for Generative Animation: Hao Zhang,

Jiahao Luo,

Bohui Wan,

Yizhou Zhao,

Zongrui Li,

Michael Vasilkovsky,

Chaoyang Wang,

Jian Wang,

Narendra Ahuja,

Bing Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Luo, Jiahao and Wan, Bohui and Zhao, Yizhou and Li, Zongrui and Vasilkovsky, Michael and Wang, Chaoyang and Wang, Jian and Ahuja, Narendra and Zhou, Bing}, title = {RigMo: Unifying Rig and Motion Learning for Generative Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25438-25449} }
TokenLight: Precise Lighting Control in Images using Attribute Tokens: Sumit Chaturvedi,

Yannick Hold-Geoffroy,

Mengwei Ren,

Jingyuan Liu,

He Zhang,

Yiqun Mei,

Julie Dorsey,

Zhixin Shu; [pdf] [supp]
[bibtex]
@InProceedings{Chaturvedi_2026_CVPR, author = {Chaturvedi, Sumit and Hold-Geoffroy, Yannick and Ren, Mengwei and Liu, Jingyuan and Zhang, He and Mei, Yiqun and Dorsey, Julie and Shu, Zhixin}, title = {TokenLight: Precise Lighting Control in Images using Attribute Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19737-19748} }
UltraFlux: Data-Model Co-Design for High-quality Native 4K Text-to-Image Generation across Diverse Aspect Ratios: Tian Ye,

Song Fei,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Tian and Fei, Song and Zhu, Lei}, title = {UltraFlux: Data-Model Co-Design for High-quality Native 4K Text-to-Image Generation across Diverse Aspect Ratios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22070-22079} }
InverFill: One-Step Inversion for Enhanced Few-Step Diffusion Inpainting: Duc Vu,

Kien Nguyen,

Trong-Tung Nguyen,

Ngan Nguyen,

Phong Nguyen,

Khoi Nguyen,

Cuong Pham,

Anh Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vu_2026_CVPR, author = {Vu, Duc and Nguyen, Kien and Nguyen, Trong-Tung and Nguyen, Ngan and Nguyen, Phong and Nguyen, Khoi and Pham, Cuong and Tran, Anh}, title = {InverFill: One-Step Inversion for Enhanced Few-Step Diffusion Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25677-25687} }
HandWorld: Hand-Centric Unified Video Action Generation: Zhihao Sun,

Zhiying Du,

Xitong Yang,

Zuxuan Wu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhihao and Du, Zhiying and Yang, Xitong and Wu, Zuxuan}, title = {HandWorld: Hand-Centric Unified Video Action Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15976-15985} }
Training-Only Heterogeneous Image-Patch-Text Graph Supervision for Advancing Few-Shot Learning Adapters: Mohammed Rahman Sherif Khan Mohammad,

Ardhendu Behera,

Sandip Pradhan,

Swagat Kumar,

Amr Ahmed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mohammad_2026_CVPR, author = {Mohammad, Mohammed Rahman Sherif Khan and Behera, Ardhendu and Pradhan, Sandip and Kumar, Swagat and Ahmed, Amr}, title = {Training-Only Heterogeneous Image-Patch-Text Graph Supervision for Advancing Few-Shot Learning Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19613-19622} }
Animator-Centric Skeleton Generation on Objects with Fine-Grained Details: Mingze Sun,

Cheng Zeng,

Jiansong Pei,

Junhao Chen,

Chaoyue Song,

Shaohui Wang,

Tianyuan Chang,

Bin Huang,

Zijiao Zeng,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Mingze and Zeng, Cheng and Pei, Jiansong and Chen, Junhao and Song, Chaoyue and Wang, Shaohui and Chang, Tianyuan and Huang, Bin and Zeng, Zijiao and Huang, Ruqi}, title = {Animator-Centric Skeleton Generation on Objects with Fine-Grained Details}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17336-17345} }
ROSE: Rotate Your Large Language Model to See: Tongtian Yue,

Xuange Gao,

Longteng Guo,

Zijia Zhao,

Zikang Liu,

Jie Jiang,

Hua Huang,

Jing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Tongtian and Gao, Xuange and Guo, Longteng and Zhao, Zijia and Liu, Zikang and Jiang, Jie and Huang, Hua and Liu, Jing}, title = {ROSE: Rotate Your Large Language Model to See}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19265-19275} }
Beyond Myopic Alignment: Lookahead Optimization for Online Class-Incremental Learning: Song Lai,

Zhe Zhao,

Fei Zhu,

Ji Cheng,

Xi Lin,

Qingfu Zhang,

Gaofeng Meng; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Song and Zhao, Zhe and Zhu, Fei and Cheng, Ji and Lin, Xi and Zhang, Qingfu and Meng, Gaofeng}, title = {Beyond Myopic Alignment: Lookahead Optimization for Online Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18053-18062} }
Endless World: Real-Time 3D-Aware Long Video Generation: Ke Zhang,

Jiacong Xu,

Yiqun Mei,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ke and Xu, Jiacong and Mei, Yiqun and Patel, Vishal M.}, title = {Endless World: Real-Time 3D-Aware Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18386-18395} }
RAISE: Requirement-Adaptive Evolutionary Refinement for Training-Free Text-to-Image Alignment: Liyao Jiang,

Ruichen Chen,

Chao Gao,

Di Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Liyao and Chen, Ruichen and Gao, Chao and Niu, Di}, title = {RAISE: Requirement-Adaptive Evolutionary Refinement for Training-Free Text-to-Image Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22038-22048} }
4DSurf: High-Fidelity Dynamic Scene Surface Reconstruction: Renjie Wu,

Hongdong Li,

Jose M. Alvarez,

Miaomiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Renjie and Li, Hongdong and Alvarez, Jose M. and Liu, Miaomiao}, title = {4DSurf: High-Fidelity Dynamic Scene Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22539-22549} }
Pixel Motion Diffusion is What We Need for Robot Control: E-Ro Nguyen,

Yichi Zhang,

Kanchana Ranasinghe,

Xiang Li,

Michael S. Ryoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, E-Ro and Zhang, Yichi and Ranasinghe, Kanchana and Li, Xiang and Ryoo, Michael S.}, title = {Pixel Motion Diffusion is What We Need for Robot Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23663-23672} }
RAW-Domain Degradation Models for Realistic Smartphone Super-Resolution: Ali Mosleh,

Faraz Ali,

Fengjia Zhang,

Stavros Tsogkas,

Junyong Lee,

Michael S. Brown,

Alex Levinshtein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mosleh_2026_CVPR, author = {Mosleh, Ali and Ali, Faraz and Zhang, Fengjia and Tsogkas, Stavros and Lee, Junyong and Brown, Michael S. and Levinshtein, Alex}, title = {RAW-Domain Degradation Models for Realistic Smartphone Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23430-23439} }
BeautyGRPO: Aesthetic Alignment for Face Retouching via Dynamic Path Guidance and Fine-Grained Preference Modeling: Jiachen Yang,

Xianhui Lin,

Yi Dong,

Zebiao Zheng,

Xing Liu,

Hong Gu,

Yanmei Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiachen and Lin, Xianhui and Dong, Yi and Zheng, Zebiao and Liu, Xing and Gu, Hong and Fang, Yanmei}, title = {BeautyGRPO: Aesthetic Alignment for Face Retouching via Dynamic Path Guidance and Fine-Grained Preference Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25110-25120} }
DreamSAC: Learning Hamiltonian World Models via Symmetry Exploration: Jinzhou Tang,

Fan Feng,

Minghao Fu,

Wenjun Lin,

Jing Yang,

Biwei Huang,

Keze Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jinzhou and Feng, Fan and Fu, Minghao and Lin, Wenjun and Yang, Jing and Huang, Biwei and Wang, Keze}, title = {DreamSAC: Learning Hamiltonian World Models via Symmetry Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15188-15198} }
Resolving Evidence Sparsity: Agentic Context Engineering for Long-Document Understanding: Keliang Liu,

Zizhi Chen,

Mingcheng Li,

Jingqun Tang,

Dingkang Yang,

Lihua Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Keliang and Chen, Zizhi and Li, Mingcheng and Tang, Jingqun and Yang, Dingkang and Zhang, Lihua}, title = {Resolving Evidence Sparsity: Agentic Context Engineering for Long-Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19452-19462} }
4DP-QA: Scalable QA for 4D Perception in Vision Language Models: Seokju Cho,

Abhishek Badki,

Hang Su,

Jindong Jiang,

Ziyao Zeng,

Seungryong Kim,

Sifei Liu,

Orazio Gallo; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Seokju and Badki, Abhishek and Su, Hang and Jiang, Jindong and Zeng, Ziyao and Kim, Seungryong and Liu, Sifei and Gallo, Orazio}, title = {4DP-QA: Scalable QA for 4D Perception in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23869-23879} }
Neighbor-Aware Localized Concept Erasure in Text-to-Image Diffusion Models: Zhuan Shi,

Alireza Dehghanpour Farashah,

Rik de Vries,

Golnoosh Farnadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhuan and Farashah, Alireza Dehghanpour and de Vries, Rik and Farnadi, Golnoosh}, title = {Neighbor-Aware Localized Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17440-17450} }
DarkShake-DVS: Event-based Human Action Recognition under Low-light and Shaking Camera Conditions: Jiaqi Chen,

Qinfu Xu,

Liyuan Pan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiaqi and Xu, Qinfu and Pan, Liyuan}, title = {DarkShake-DVS: Event-based Human Action Recognition under Low-light and Shaking Camera Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20149-20159} }
TriSim: Tri-Dimensional Similarity Modeling with Extreme Value Theory for False-Negative Mitigation in Remote Sensing Image-Text Retrieval: Chengyu Zheng,

Hanzhang Lu,

Jie Nie,

Shan Du; [pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Chengyu and Lu, Hanzhang and Nie, Jie and Du, Shan}, title = {TriSim: Tri-Dimensional Similarity Modeling with Extreme Value Theory for False-Negative Mitigation in Remote Sensing Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23944-23954} }
Self-Consistency for LLM-Based Motion Trajectory Generation and Verification: Jiaju Ma,

R. Kenny Jones,

Jiajun Wu,

Maneesh Agrawala; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Jiaju and Jones, R. Kenny and Wu, Jiajun and Agrawala, Maneesh}, title = {Self-Consistency for LLM-Based Motion Trajectory Generation and Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17357-17366} }
UTPTrack: Towards Simple and Unified Token Pruning for Visual Tracking: Hao Wu,

Xudong Wang,

Jialiang Zhang,

Junlong Tong,

Xinghao Chen,

Junyan Lin,

Yunpu Ma,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Hao and Wang, Xudong and Zhang, Jialiang and Tong, Junlong and Chen, Xinghao and Lin, Junyan and Ma, Yunpu and Shen, Xiaoyu}, title = {UTPTrack: Towards Simple and Unified Token Pruning for Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20963-20972} }
DASH: A Meta-Attack Framework for Synthesizing Effective and Stealthy Adversarial Examples: Abdullah Al Nomaan Nafi,

Habibur Rahaman,

Zafaryab Haider,

Tanzim Mahfuz,

Fnu Suya,

Swarup Bhunia,

Prabuddha Chakraborty; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Al_Nomaan_Nafi_2026_CVPR, author = {Al Nomaan Nafi, Abdullah and Rahaman, Habibur and Haider, Zafaryab and Mahfuz, Tanzim and Suya, Fnu and Bhunia, Swarup and Chakraborty, Prabuddha}, title = {DASH: A Meta-Attack Framework for Synthesizing Effective and Stealthy Adversarial Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27884-27893} }
ReasonEdit: Towards Reasoning-Enhanced Image Editing Models: Fukun Yin,

Shiyu Liu,

Yucheng Han,

Zhibo Wang,

Peng Xing,

Rui Wang,

Wei Cheng,

Yingming Wang,

Aojie Li,

Zixin Yin,

Pengtao Chen,

Xianfang Zeng,

Gang Yu,

Daxin Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Fukun and Liu, Shiyu and Han, Yucheng and Wang, Zhibo and Xing, Peng and Wang, Rui and Cheng, Wei and Wang, Yingming and Li, Aojie and Yin, Zixin and Chen, Pengtao and Zeng, Xianfang and Yu, Gang and Jiang, Daxin}, title = {ReasonEdit: Towards Reasoning-Enhanced Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23283-23293} }
PromptEnhancer: Taming Your Rewriter for Text-to-Image Generation via Fine-Grained Reward: Linqing Wang,

Zhiyong Xu,

Ximing Xing,

Yiji Cheng,

Zhiyuan Zhao,

Donghao Li,

Tiankai Hang,

Zhenxi Li,

Jiale Tao,

Qixun Wang,

Ruihuang Li,

Comi Chen,

Xin Li,

Mingrui Wu,

Xinchi Deng,

Shuyang Gu,

Chunyu Wang,

Qinglin Lu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Linqing and Xu, Zhiyong and Xing, Ximing and Cheng, Yiji and Zhao, Zhiyuan and Li, Donghao and Hang, Tiankai and Li, Zhenxi and Tao, Jiale and Wang, Qixun and Li, Ruihuang and Chen, Comi and Li, Xin and Wu, Mingrui and Deng, Xinchi and Gu, Shuyang and Wang, Chunyu and Lu, Qinglin}, title = {PromptEnhancer: Taming Your Rewriter for Text-to-Image Generation via Fine-Grained Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14895-14904} }
FAST: Topology-Aware Frequency-Domain Distribution Matching for Coreset Selection: Jin Cui,

Boran Zhao,

Jiajun Xu,

Jiaqi Guo,

Shuo Guan,

Pengju Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Jin and Zhao, Boran and Xu, Jiajun and Guo, Jiaqi and Guan, Shuo and Ren, Pengju}, title = {FAST: Topology-Aware Frequency-Domain Distribution Matching for Coreset Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24748-24758} }
Clay-to-Stone: Phase-wise 3D Gaussian Splatting for Monocular Articulated Hand-Object Manipulation Modeling: Xingyu Liu,

Pengfei Ren,

Qi Qi,

Haifeng Sun,

Zirui Zhuang,

Jianxin Liao,

Jingyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xingyu and Ren, Pengfei and Qi, Qi and Sun, Haifeng and Zhuang, Zirui and Liao, Jianxin and Wang, Jingyu}, title = {Clay-to-Stone: Phase-wise 3D Gaussian Splatting for Monocular Articulated Hand-Object Manipulation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23128-23138} }
MoRE: 3D Visual Geometry Reconstruction Meets Mixture-of-Experts: Jingnan Gao,

Zhe Wang,

Xianze Fang,

Xingyu Ren,

Zhuo Chen,

Shengqi Liu,

Yuhao Cheng,

Jiangjing Lyu,

Xiaokang Yang,

Yichao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jingnan and Wang, Zhe and Fang, Xianze and Ren, Xingyu and Chen, Zhuo and Liu, Shengqi and Cheng, Yuhao and Lyu, Jiangjing and Yang, Xiaokang and Yan, Yichao}, title = {MoRE: 3D Visual Geometry Reconstruction Meets Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14680-14691} }
UniVerse: A Unified Modulation Framework for Segmentation-Free, Disentangled Multi-Concept Personalization: Quynh Phung,

Sandesh Ghimire,

Minsi Hu,

Chung-Chi Tsai,

Jia-Bin Huang; [pdf] [supp]
[bibtex]
@InProceedings{Phung_2026_CVPR, author = {Phung, Quynh and Ghimire, Sandesh and Hu, Minsi and Tsai, Chung-Chi and Huang, Jia-Bin}, title = {UniVerse: A Unified Modulation Framework for Segmentation-Free, Disentangled Multi-Concept Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22007-22016} }
GeoMotion: Rethinking Motion Segmentation via Latent 4D Geometry: Xiankang He,

Peile Lin,

Ying Cui,

Dongyan Guo,

Chunhua Shen,

Xiaoqin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xiankang and Lin, Peile and Cui, Ying and Guo, Dongyan and Shen, Chunhua and Zhang, Xiaoqin}, title = {GeoMotion: Rethinking Motion Segmentation via Latent 4D Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28145-28155} }
Generative Point Tracking and Forecasting: Xuanchen Lu,

Ang Cao,

Chao Feng,

Andrew Owens; [pdf]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Xuanchen and Cao, Ang and Feng, Chao and Owens, Andrew}, title = {Generative Point Tracking and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28167-28178} }
RMAE-ProGRess: Advancing Semantic Segmentation in Unstructured Environments: Manish Bhurtel,

Danda B. Rawat; [pdf] [supp]
[bibtex]
@InProceedings{Bhurtel_2026_CVPR, author = {Bhurtel, Manish and Rawat, Danda B.}, title = {RMAE-ProGRess: Advancing Semantic Segmentation in Unstructured Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20511-20520} }
DUET-VLM: Dual stage Unified Efficient Token reduction for VLM Training and Inference: Aditya Kumar Singh,

Hitesh Kandala,

Pratik Prabhanjan Brahma,

Zicheng Liu,

Emad Barsoum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Aditya Kumar and Kandala, Hitesh and Brahma, Pratik Prabhanjan and Liu, Zicheng and Barsoum, Emad}, title = {DUET-VLM: Dual stage Unified Efficient Token reduction for VLM Training and Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17651-17660} }
LAMP: Localization Aware Multi-camera People Tracking in Metric 3D World: Nan Yang,

Julian Straub,

Fan Zhang,

Richard Newcombe,

Jakob Engel,

Lingni Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Nan and Straub, Julian and Zhang, Fan and Newcombe, Richard and Engel, Jakob and Ma, Lingni}, title = {LAMP: Localization Aware Multi-camera People Tracking in Metric 3D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21208-21220} }
FPS-Bench: A Benchmark for High Frame-Rate Video Understanding: Rohan Choudhury,

Jean-Sebastien Dandurand,

Kai Qiu,

Kshitij Madhav Bhat,

Kartik Sharma,

Liza Dahiya,

Yizhou Zhao,

Souraja Kundu,

Chun-Hsien Lin,

Kris M. Kitani,

László A. Jeni; [pdf] [supp]
[bibtex]
@InProceedings{Choudhury_2026_CVPR, author = {Choudhury, Rohan and Dandurand, Jean-Sebastien and Qiu, Kai and Bhat, Kshitij Madhav and Sharma, Kartik and Dahiya, Liza and Zhao, Yizhou and Kundu, Souraja and Lin, Chun-Hsien and Kitani, Kris M. and Jeni, L\'aszl\'o A.}, title = {FPS-Bench: A Benchmark for High Frame-Rate Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18598-18608} }
Points-to-3D: Structure-Aware 3D Generation with Point Cloud Priors: Jiatong Xia,

Zicheng Duan,

Anton van den Hengel,

Lingqiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Jiatong and Duan, Zicheng and van den Hengel, Anton and Liu, Lingqiao}, title = {Points-to-3D: Structure-Aware 3D Generation with Point Cloud Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19928-19939} }
V-RGBX: Video Editing with Accurate Controls over Intrinsic Properties: Ye Fang,

Tong Wu,

Valentin Deschaintre,

Duygu Ceylan,

Iliyan Georgiev,

Chun-Hao Paul Huang,

Yiwei Hu,

Xuelin Chen,

Tuanfeng Yang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Ye and Wu, Tong and Deschaintre, Valentin and Ceylan, Duygu and Georgiev, Iliyan and Huang, Chun-Hao Paul and Hu, Yiwei and Chen, Xuelin and Wang, Tuanfeng Yang}, title = {V-RGBX: Video Editing with Accurate Controls over Intrinsic Properties}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23182-23192} }
ViLearn: Accelerating Training Convergence of Image-to-3D Generation via Visibility Learning: Rui Chen,

Jianfeng Zhang,

Jing Lin,

Xuanyu Yi,

Yixun Liang,

Guan Luo,

Xiu Li,

Zeming Li,

Ping Tan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Rui and Zhang, Jianfeng and Lin, Jing and Yi, Xuanyu and Liang, Yixun and Luo, Guan and Li, Xiu and Li, Zeming and Tan, Ping}, title = {ViLearn: Accelerating Training Convergence of Image-to-3D Generation via Visibility Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27041-27051} }
Seeing Through the Shift: Causality-Inspired Robust Generalized Category Discovery: Wei Feng,

Yiwen Jiang,

Sijin Zhou,

Zhuang Qi,

Zhongxing Xu,

Zhonghua Wang,

Feilong Tang,

Zongyuan Ge; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Jiang, Yiwen and Zhou, Sijin and Qi, Zhuang and Xu, Zhongxing and Wang, Zhonghua and Tang, Feilong and Ge, Zongyuan}, title = {Seeing Through the Shift: Causality-Inspired Robust Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17766-17775} }
Inference-time Physics Alignment of Video Generative Models with Latent World Models: Jianhao Yuan,

Xiaofeng Zhang,

Felix Friedrich,

Nicolas Beltran-Velez,

Melissa Hall,

Reyhane Askari-Hemmat,

Xiaochuang Han,

Nicolas Ballas,

Michal Drozdzal,

Adriana Romero-Soriano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Jianhao and Zhang, Xiaofeng and Friedrich, Felix and Beltran-Velez, Nicolas and Hall, Melissa and Askari-Hemmat, Reyhane and Han, Xiaochuang and Ballas, Nicolas and Drozdzal, Michal and Romero-Soriano, Adriana}, title = {Inference-time Physics Alignment of Video Generative Models with Latent World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16118-16129} }
Unlocking Motion from Large Vision Models with a Semantic and Kinematic Duality for Gait Recognition: Zhanbo Huang,

Dingqiang Ye,

Xiaoming Liu,

Yu Kong; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhanbo and Ye, Dingqiang and Liu, Xiaoming and Kong, Yu}, title = {Unlocking Motion from Large Vision Models with a Semantic and Kinematic Duality for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28379-28390} }
Probabilistic Precipitation Nowcasting with Rectified Flow Transformers: Johannes Schusterbauer,

Jannik Wiese,

Nick Stracke,

Timy Phan,

Björn Ommer; [pdf] [supp]
[bibtex]
@InProceedings{Schusterbauer_2026_CVPR, author = {Schusterbauer, Johannes and Wiese, Jannik and Stracke, Nick and Phan, Timy and Ommer, Bj\"orn}, title = {Probabilistic Precipitation Nowcasting with Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25742-25756} }
PR-MaGIC: Prompt Refinement Via Mask Decoder Gradient Flow For In-Context Segmentation: Minjae Lee,

Sungwoo Hur,

Soojin Hwang,

Won Hwa Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Minjae and Hur, Sungwoo and Hwang, Soojin and Kim, Won Hwa}, title = {PR-MaGIC: Prompt Refinement Via Mask Decoder Gradient Flow For In-Context Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21659-21668} }
PixDLM: A Dual-Path Multimodal Language Model for UAV Reasoning Segmentation: Shuyan Ke,

Yifan Mei,

Changli Wu,

Yonghan Zheng,

Jiayi Ji,

Liujuan Cao,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Shuyan and Mei, Yifan and Wu, Changli and Zheng, Yonghan and Ji, Jiayi and Cao, Liujuan and Ji, Rongrong}, title = {PixDLM: A Dual-Path Multimodal Language Model for UAV Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26165-26175} }
Vibe Spaces for Creatively Connecting and Expressing Visual Concepts: Huzheng Yang,

Katherine Xu,

Andrew Lu,

Michael D. Grossberg,

Yutong Bai,

Jianbo Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Huzheng and Xu, Katherine and Lu, Andrew and Grossberg, Michael D. and Bai, Yutong and Shi, Jianbo}, title = {Vibe Spaces for Creatively Connecting and Expressing Visual Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21912-21921} }
Dual-level Adaptation for Multi-Object Tracking: Building Test-Time Calibration from Experience and Intuition: Wen Guo,

Pengfei Zhao,

Zongmeng Wang,

Yufan Hu,

Junyu Gao; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Wen and Zhao, Pengfei and Wang, Zongmeng and Hu, Yufan and Gao, Junyu}, title = {Dual-level Adaptation for Multi-Object Tracking: Building Test-Time Calibration from Experience and Intuition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28190-28200} }
Revisiting Token Compression for Accelerating ViT-based Sparse Multi-View 3D Object Detectors: Mingqian Ji,

Shanshan Zhang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Mingqian and Zhang, Shanshan and Yang, Jian}, title = {Revisiting Token Compression for Accelerating ViT-based Sparse Multi-View 3D Object Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18776-18785} }
CD-Buffer: Complementary Dual-Buffer Framework for Test-Time Adaptation in Adverse Weather Object Detection: Youngjun Song,

Hyeongyu Kim,

Dosik Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Youngjun and Kim, Hyeongyu and Hwang, Dosik}, title = {CD-Buffer: Complementary Dual-Buffer Framework for Test-Time Adaptation in Adverse Weather Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15050-15059} }
Boosting Document Parsing Efficiency and Performance with Coarse-to-Fine Visual Processing: Cheng Cui,

Ting Sun,

Suyin Liang,

Tingquan Gao,

Zelun Zhang,

Jiaxuan Liu,

Xueqing Wang,

Changda Zhou,

Hongen Liu,

Manhui Lin,

Yue Zhang,

Yubo Zhang,

Jing Zhang,

Jun Zhang,

Xing Wei,

Yi Liu,

Dianhai Yu,

Yanjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Cheng and Sun, Ting and Liang, Suyin and Gao, Tingquan and Zhang, Zelun and Liu, Jiaxuan and Wang, Xueqing and Zhou, Changda and Liu, Hongen and Lin, Manhui and Zhang, Yue and Zhang, Yubo and Zhang, Jing and Zhang, Jun and Wei, Xing and Liu, Yi and Yu, Dianhai and Ma, Yanjun}, title = {Boosting Document Parsing Efficiency and Performance with Coarse-to-Fine Visual Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16655-16665} }
FlashPortrait: 6x Faster Infinite Portrait Animation with Adaptive Latent Prediction: Shuyuan Tu,

Yueming Pan,

Yinming Huang,

Xintong Han,

Zhen Xing,

Qi Dai,

Kai Qiu,

Chong Luo,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Shuyuan and Pan, Yueming and Huang, Yinming and Han, Xintong and Xing, Zhen and Dai, Qi and Qiu, Kai and Luo, Chong and Wu, Zuxuan}, title = {FlashPortrait: 6x Faster Infinite Portrait Animation with Adaptive Latent Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25163-25173} }
EffectMaker: Unifying Reasoning and Generation for Customized Visual Effect Creation: Shiyuan Yang,

Ruihuang Li,

Jiale Tao,

Shuai Shao,

Qinglin Lu,

Jing Liao; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Shiyuan and Li, Ruihuang and Tao, Jiale and Shao, Shuai and Lu, Qinglin and Liao, Jing}, title = {EffectMaker: Unifying Reasoning and Generation for Customized Visual Effect Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16108-16117} }
SOTA: Self-adaptive Optimal Transport for Zero-Shot Classification with Multiple Foundation Models: Zhanxuan Hu,

Qiyu Xu,

Yu Duan,

Yonghang Tai,

Huafeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zhanxuan and Xu, Qiyu and Duan, Yu and Tai, Yonghang and Li, Huafeng}, title = {SOTA: Self-adaptive Optimal Transport for Zero-Shot Classification with Multiple Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26624-26634} }
Dual-Level Confidence based Implicit Self-Refinement for Medical Visual Question Answering: Meihong Pan,

Yefeng Zheng; [pdf]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Meihong and Zheng, Yefeng}, title = {Dual-Level Confidence based Implicit Self-Refinement for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17215-17225} }
SkeletonContext: Skeleton-side Context Prompt Learning for Zero-Shot Skeleton-based Action Recognition: Ning Wang,

Tieyue Wu,

Naeha Sharif,

Farid Boussaid,

Guangming Zhu,

Lin Mei,

Mohammed Bennamoun,

Liang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ning and Wu, Tieyue and Sharif, Naeha and Boussaid, Farid and Zhu, Guangming and Mei, Lin and Bennamoun, Mohammed and Zhang, Liang}, title = {SkeletonContext: Skeleton-side Context Prompt Learning for Zero-Shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20170-20180} }
GenHOI: Towards Object-Consistent Hand-Object Interaction with Temporally Balanced and Spatially Selective Object Injection: Xuan Huang,

Mochu Xiang,

Zhelun Shen,

Jinbo Wu,

Chenming Wu,

Chen Zhao,

Kaisiyuan Wang,

Hang Zhou,

Shanshan Liu,

Haocheng Feng,

Wei He,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xuan and Xiang, Mochu and Shen, Zhelun and Wu, Jinbo and Wu, Chenming and Zhao, Chen and Wang, Kaisiyuan and Zhou, Hang and Liu, Shanshan and Feng, Haocheng and He, Wei and Wang, Jingdong}, title = {GenHOI: Towards Object-Consistent Hand-Object Interaction with Temporally Balanced and Spatially Selective Object Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23117-23127} }
From Scale to Speed: Adaptive Test-Time Scaling for Image Editing: Xiangyan Qu,

Zhenlong Yuan,

Jing Tang,

Rui Chen,

Datao Tang,

Meng Yu,

Lei Sun,

Yancheng Bai,

Xiangxiang Chu,

Gaopeng Gou,

Gang Xiong,

Yujun Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Xiangyan and Yuan, Zhenlong and Tang, Jing and Chen, Rui and Tang, Datao and Yu, Meng and Sun, Lei and Bai, Yancheng and Chu, Xiangxiang and Gou, Gaopeng and Xiong, Gang and Cai, Yujun}, title = {From Scale to Speed: Adaptive Test-Time Scaling for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23272-23282} }
Learning to Track Instance from Single Nature Language Description: Yaozong Zheng,

Bineng Zhong,

Qihua Liang,

Shuimu Zeng,

Haiying Xia,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yaozong and Zhong, Bineng and Liang, Qihua and Zeng, Shuimu and Xia, Haiying and Song, Shuxiang}, title = {Learning to Track Instance from Single Nature Language Description}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20922-20931} }
POINTS-Long: Adaptive Dual-Mode Visual Reasoning in MLLMs: Haicheng Wang,

Yuan Liu,

Yikun Liu,

Zhemeng Yu,

Zhongyin Zhao,

Yangxiu You,

Zilin Yu,

Le Tian,

Zhou Xiao,

Jie Zhou,

Weidi Xie,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Haicheng and Liu, Yuan and Liu, Yikun and Yu, Zhemeng and Zhao, Zhongyin and You, Yangxiu and Yu, Zilin and Tian, Le and Xiao, Zhou and Zhou, Jie and Xie, Weidi and Wang, Yanfeng}, title = {POINTS-Long: Adaptive Dual-Mode Visual Reasoning in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19119-19131} }
EventDrive: Event Cameras for Vision-Language Driving Intelligence: Dongyue Lu,

Rong Li,

Ao Liang,

Lingdong Kong,

Wei Yin,

Lai Xing Ng,

Benoit R. Cottereau,

Camille Simon Chane,

Wei Tsang Ooi; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Dongyue and Li, Rong and Liang, Ao and Kong, Lingdong and Yin, Wei and Ng, Lai Xing and Cottereau, Benoit R. and Chane, Camille Simon and Ooi, Wei Tsang}, title = {EventDrive: Event Cameras for Vision-Language Driving Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22312-22322} }
Evaluating Generative Models via One-Dimensional Code Distributions: Zexi Jia,

Pengcheng Luo,

Yijia Zhong,

Jinchao Zhang,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Zexi and Luo, Pengcheng and Zhong, Yijia and Zhang, Jinchao and Zhou, Jie}, title = {Evaluating Generative Models via One-Dimensional Code Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17077-17086} }
ComPose: A Unified Completion-Pose Framework for Robust Category-Level Object Pose Estimation: Huan Ren,

Yihan Chen,

Chuxin Wang,

Nailong Liu,

Wenfei Yang,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Huan and Chen, Yihan and Wang, Chuxin and Liu, Nailong and Yang, Wenfei and Zhang, Tianzhu}, title = {ComPose: A Unified Completion-Pose Framework for Robust Category-Level Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14315-14324} }
LaVR: Scene Latent Conditioned Generative Video Trajectory Re-Rendering using Large 4D Reconstruction Models: Mingyang Xie,

Numair Khan,

Tianfu Wang,

Naina Dhingra,

Seonghyeon Nam,

Haitao Yang,

Zhuo Hui,

Christopher Metzler,

Andrea Vedaldi,

Hamed Pirsiavash,

Lei Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Mingyang and Khan, Numair and Wang, Tianfu and Dhingra, Naina and Nam, Seonghyeon and Yang, Haitao and Hui, Zhuo and Metzler, Christopher and Vedaldi, Andrea and Pirsiavash, Hamed and Luo, Lei}, title = {LaVR: Scene Latent Conditioned Generative Video Trajectory Re-Rendering using Large 4D Reconstruction Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25450-25460} }
Robust3DGSW: Toward Robust Watermarking for Quantization-Aware 3D Gaussian Splatting: Boyu Wang,

Jun Xia,

Mingsong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Boyu and Xia, Jun and Chen, Mingsong}, title = {Robust3DGSW: Toward Robust Watermarking for Quantization-Aware 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19076-19084} }
RehearseVLA: Simulated Post-Training for VLAs with Physically-Consistent World Model: Junjin Xiao,

Yandan Yang,

Xinyuan Chang,

Ronghan Chen,

Feng Xiong,

Mu Xu,

Wei-Shi Zheng,

Qing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junjin and Yang, Yandan and Chang, Xinyuan and Chen, Ronghan and Xiong, Feng and Xu, Mu and Zheng, Wei-Shi and Zhang, Qing}, title = {RehearseVLA: Simulated Post-Training for VLAs with Physically-Consistent World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20867-20877} }
Progressive Guessing to Fixed Point: Rethinking Human Motion Prediction with Deep Equilibrium Models: Dong Wei,

Huaijiang Sun,

Fan Liu,

Yuhui Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Dong and Sun, Huaijiang and Liu, Fan and Zheng, Yuhui}, title = {Progressive Guessing to Fixed Point: Rethinking Human Motion Prediction with Deep Equilibrium Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16454-16463} }
NEAF: Natural Image Editing with Attention Fusion for Generalizable Test-time Optimization in Text-Guided Image Editing: Jisoo Kim,

Heeseok Oh; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jisoo and Oh, Heeseok}, title = {NEAF: Natural Image Editing with Attention Fusion for Generalizable Test-time Optimization in Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22509-22518} }
VisiLock: Authorizing Instruction-based Image editing with Dual Score Distillation: Van Thanh Le,

Yun Fu; [pdf] [supp]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Van Thanh and Fu, Yun}, title = {VisiLock: Authorizing Instruction-based Image editing with Dual Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15710-15718} }
FlowPalm: Optical Flow Driven Non-Rigid Deformation for Geometrically Diverse Palmprint Generation: Yuchen Zou,

Huikai Shao,

Lihuang Fang,

Zhipeng Xiong,

Dexing Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Yuchen and Shao, Huikai and Fang, Lihuang and Xiong, Zhipeng and Zhong, Dexing}, title = {FlowPalm: Optical Flow Driven Non-Rigid Deformation for Geometrically Diverse Palmprint Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23591-23600} }
When Safety Collides: Resolving Multi-Category Harmful Conflicts in Text-to-Image Diffusion via Adaptive Safety Guidance: Yongli Xiang,

Ziming Hong,

Zhaoqing Wang,

Xiangyu Zhao,

Bo Han,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Yongli and Hong, Ziming and Wang, Zhaoqing and Zhao, Xiangyu and Han, Bo and Liu, Tongliang}, title = {When Safety Collides: Resolving Multi-Category Harmful Conflicts in Text-to-Image Diffusion via Adaptive Safety Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14746-14755} }
Can a Second-View Image Be a Language? Geometric and Semantic Cross-Modal Reasoning for X-ray Prohibited Item Detection: Chuang Peng,

Renshuai Tao,

Zhongwei Ren,

Xianglong Liu,

Yunchao Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Chuang and Tao, Renshuai and Ren, Zhongwei and Liu, Xianglong and Wei, Yunchao}, title = {Can a Second-View Image Be a Language? Geometric and Semantic Cross-Modal Reasoning for X-ray Prohibited Item Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26176-26186} }
YOLO-Master: MOE-Accelerated with Specialized Transformers for Enhanced Real-time Detection: Xu Lin,

Jinlong Peng,

Zhenye Gan,

Jiawen Zhu,

Jun Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Xu and Peng, Jinlong and Gan, Zhenye and Zhu, Jiawen and Liu, Jun}, title = {YOLO-Master: MOE-Accelerated with Specialized Transformers for Enhanced Real-time Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18440-18449} }
PointAlign: Feature-Level Alignment Regularization for 3D Vision-Language Models: Yuanhao Su,

Shaofeng Zhang,

Xiaosong Jia,

Qi Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Yuanhao and Zhang, Shaofeng and Jia, Xiaosong and Fan, Qi}, title = {PointAlign: Feature-Level Alignment Regularization for 3D Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22100-22110} }
MOON2.0: Dynamic Modality-balanced Multimodal Representation Learning for E-commerce Product Understanding: Zhanheng Nie,

Chenghan Fu,

Daoze Zhang,

Junxian Wu,

Wanxian Guan,

Pengjie Wang,

Jian Xu,

Bo Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Zhanheng and Fu, Chenghan and Zhang, Daoze and Wu, Junxian and Guan, Wanxian and Wang, Pengjie and Xu, Jian and Zheng, Bo}, title = {MOON2.0: Dynamic Modality-balanced Multimodal Representation Learning for E-commerce Product Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22975-22985} }
ApET: Approximation-Error Guided Token Compression for Efficient VLMs: Qiankun Ma,

Ziyao Zhang,

Haofei Wang,

Zhen Song,

Jie Chen,

Hairong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Qiankun and Zhang, Ziyao and Wang, Haofei and Song, Zhen and Chen, Jie and Zheng, Hairong}, title = {ApET: Approximation-Error Guided Token Compression for Efficient VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26306-26316} }
Polyphony: Diffusion-based Dual-Hand Action Segmentation with Alternating Vision Transformer and Semantic Conditioning: Hao Zheng,

Hu Wang,

Tiantian Zheng,

Prajjwal Bhattarai,

Tuka Alhanai; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hao and Wang, Hu and Zheng, Tiantian and Bhattarai, Prajjwal and Alhanai, Tuka}, title = {Polyphony: Diffusion-based Dual-Hand Action Segmentation with Alternating Vision Transformer and Semantic Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20098-20107} }
Learning to See through Illumination Extremes with Event Streaming in Multimodal Large Language Models: Baoheng Zhang,

Jiahui Liu,

Gui Zhao,

Weizhou Zhang,

Yixuan Ma,

Jun Jiang,

Yingxian Chen,

Wilton W.T. Fok,

Xiaojuan Qi,

Hayden Kwok-Hay So; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Baoheng and Liu, Jiahui and Zhao, Gui and Zhang, Weizhou and Ma, Yixuan and Jiang, Jun and Chen, Yingxian and Fok, Wilton W.T. and Qi, Xiaojuan and So, Hayden Kwok-Hay}, title = {Learning to See through Illumination Extremes with Event Streaming in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26198-26208} }
ORSATR-X: A Foundation Model based on Differential-and-Excitation Networks for Optical Remote Sensing Object Recognition: Canyu Mo,

Yongxiang Liu,

Jiehua Zhang,

Zilong Yu,

Zhen Liu,

Tianpeng Liu,

Li Liu; [pdf] [supp]
[bibtex]
@InProceedings{Mo_2026_CVPR, author = {Mo, Canyu and Liu, Yongxiang and Zhang, Jiehua and Yu, Zilong and Liu, Zhen and Liu, Tianpeng and Liu, Li}, title = {ORSATR-X: A Foundation Model based on Differential-and-Excitation Networks for Optical Remote Sensing Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27849-27860} }
TacSIm: A Dataset and Benchmark for Football Tactical Style Imitation: Peng Wen,

Yuting Wang,

Qiurui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Peng and Wang, Yuting and Wang, Qiurui}, title = {TacSIm: A Dataset and Benchmark for Football Tactical Style Imitation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20014-20023} }
See Further, Think Deeper: Advancing VLM's Reasoning Ability with Low-level Visual Cues and Reflection: Zhiheng Wu,

Tong Wang,

Shuning Wang,

Naiming Liu,

Yumeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhiheng and Wang, Tong and Wang, Shuning and Liu, Naiming and Zhang, Yumeng}, title = {See Further, Think Deeper: Advancing VLM's Reasoning Ability with Low-level Visual Cues and Reflection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18870-18880} }
OccAny: Generalized Unconstrained Urban 3D Occupancy: Anh-Quan Cao,

Tuan-Hung Vu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Anh-Quan and Vu, Tuan-Hung}, title = {OccAny: Generalized Unconstrained Urban 3D Occupancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28599-28609} }
MOGeo: Beyond One-to-One Cross-View Object Geo-localization: Bo Lv,

Qingwang Zhang,

Le Wu,

Yuanyuan Li,

Yingying Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Bo and Zhang, Qingwang and Wu, Le and Li, Yuanyuan and Zhu, Yingying}, title = {MOGeo: Beyond One-to-One Cross-View Object Geo-localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26422-26431} }
Beyond 3D VQAs: Injecting 3D Spatial Priors into Vision-Language Models for Enhanced Geometric Reasoning: Chun-Hsiao Yeh,

Shengyi Qian,

Manchen Wang,

Yi Ma,

Joseph Tighe,

Fanyi Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yeh_2026_CVPR, author = {Yeh, Chun-Hsiao and Qian, Shengyi and Wang, Manchen and Ma, Yi and Tighe, Joseph and Xiao, Fanyi}, title = {Beyond 3D VQAs: Injecting 3D Spatial Priors into Vision-Language Models for Enhanced Geometric Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16723-16733} }
Pressure2Motion: Hierarchical Human Motion Reconstruction from Ground Pressure with Text Guidance: Zhengxuan Li,

Qinhui Yang,

Yiyu Zhuang,

Chuan Guo,

Xinxin Zuo,

Xiaoxiao Long,

Yao Yao,

Xun Cao,

Qiu Shen,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhengxuan and Yang, Qinhui and Zhuang, Yiyu and Guo, Chuan and Zuo, Xinxin and Long, Xiaoxiao and Yao, Yao and Cao, Xun and Shen, Qiu and Zhu, Hao}, title = {Pressure2Motion: Hierarchical Human Motion Reconstruction from Ground Pressure with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23495-23505} }
Exemplar-Free Continual Learning for State Space Models: Isaac Ning Lee,

Leila Mahmoodi,

Trung Le,

Mehrtash Harandi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Isaac Ning and Mahmoodi, Leila and Le, Trung and Harandi, Mehrtash}, title = {Exemplar-Free Continual Learning for State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25077-25087} }
AdaDexTrack: Dynamic Modulation for Adaptive and Generalizable Dexterous Manipulation Tracking: Jianibieke Adalibieke,

Qianwei Han,

Xueyi Liu,

Yuzhe Qin,

Li Yi; [pdf] [supp]
[bibtex]
@InProceedings{Adalibieke_2026_CVPR, author = {Adalibieke, Jianibieke and Han, Qianwei and Liu, Xueyi and Qin, Yuzhe and Yi, Li}, title = {AdaDexTrack: Dynamic Modulation for Adaptive and Generalizable Dexterous Manipulation Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28021-28031} }
LightRR: A Lightweight Network for Single Image Reflection Removal: Wenbin Yin,

Junkang Zhang,

Sunzhe Yang,

Faming Fang,

Guixu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Wenbin and Zhang, Junkang and Yang, Sunzhe and Fang, Faming and Zhang, Guixu}, title = {LightRR: A Lightweight Network for Single Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19706-19715} }
SceneMaker: Open-set 3D Scene Generation with Decoupled De-occlusion and Pose Estimation Model: Yukai Shi,

Weiyu Li,

Zihao Wang,

Hongyang Li,

Xingyu Chen,

Ping Tan,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yukai and Li, Weiyu and Wang, Zihao and Li, Hongyang and Chen, Xingyu and Tan, Ping and Zhang, Lei}, title = {SceneMaker: Open-set 3D Scene Generation with Decoupled De-occlusion and Pose Estimation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27146-27156} }
Block-Sparse Global Attention for Efficient Multi-View Geometry Transformers: Chung-Shien Brian Wang,

Christian Schmidt,

Jens Piekenbrinck,

Bastian Leibe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chung-Shien Brian and Schmidt, Christian and Piekenbrinck, Jens and Leibe, Bastian}, title = {Block-Sparse Global Attention for Efficient Multi-View Geometry Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14546-14555} }
AMap: Distilling Future Priors for Ahead-Aware Online HD Map Construction: Ruikai Li,

Xinrun Li,

Mengwei Xie,

Hao Shan,

Shoumeng Qiu,

Xinyuan Chang,

Yizhe Fan,

Feng Xiong,

Han Jiang,

Yilong Ren,

Haiyang Yu,

Mu Xu,

Yang Long,

Varun Ojha,

Zhiyong Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ruikai and Li, Xinrun and Xie, Mengwei and Shan, Hao and Qiu, Shoumeng and Chang, Xinyuan and Fan, Yizhe and Xiong, Feng and Jiang, Han and Ren, Yilong and Yu, Haiyang and Xu, Mu and Long, Yang and Ojha, Varun and Cui, Zhiyong}, title = {AMap: Distilling Future Priors for Ahead-Aware Online HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24906-24917} }
SciEducator: Scientific Video Understanding and Educating via Deming-Cycle Multi-Agent System: Zhiyu Xu,

Weilong Yan,

Yufei Shi,

Xin Meng,

Tao He,

Huiping Zhuang,

Ming Li,

Hehe Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhiyu and Yan, Weilong and Shi, Yufei and Meng, Xin and He, Tao and Zhuang, Huiping and Li, Ming and Fan, Hehe}, title = {SciEducator: Scientific Video Understanding and Educating via Deming-Cycle Multi-Agent System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26497-26507} }
GHPT: Real-Time Relightable Gaussian Splatting using Hybrid Path Tracing: Jinyang Bo,

Fan Dou,

Wenrui Quan,

Shangxun Liu,

Yang Xu,

Yuhe Zhang,

Kang Li,

Guohua Geng; [pdf] [supp]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Jinyang and Dou, Fan and Quan, Wenrui and Liu, Shangxun and Xu, Yang and Zhang, Yuhe and Li, Kang and Geng, Guohua}, title = {GHPT: Real-Time Relightable Gaussian Splatting using Hybrid Path Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25990-25999} }
WorldGen: From Text to Traversable and Interactive 3D Worlds: Dilin Wang,

Hyunyoung Jung,

Tom Monnier,

Kihyuk Sohn,

Chuhang Zou,

Xiaoyu Xiang,

Yu-Ying Yeh,

Di Liu,

Zixuan Huang,

Thu Nguyen-Phuoc,

Yuchen Fan,

Sergiu Oprea,

Ziyan Wang,

Roman Shapovalov,

Nikolaos Sarafianos,

Thibault Groueix,

Antoine Toisoul,

Prithviraj Dhar,

Xiao Chu,

Minghao Chen,

Geon Yeong Park,

Rakesh Ranjan,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dilin and Jung, Hyunyoung and Monnier, Tom and Sohn, Kihyuk and Zou, Chuhang and Xiang, Xiaoyu and Yeh, Yu-Ying and Liu, Di and Huang, Zixuan and Nguyen-Phuoc, Thu and Fan, Yuchen and Oprea, Sergiu and Wang, Ziyan and Shapovalov, Roman and Sarafianos, Nikolaos and Groueix, Thibault and Toisoul, Antoine and Dhar, Prithviraj and Chu, Xiao and Chen, Minghao and Park, Geon Yeong and Ranjan, Rakesh and Vedaldi, Andrea}, title = {WorldGen: From Text to Traversable and Interactive 3D Worlds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27124-27135} }
SJD-PAC: Accelerating Speculative Jacobi Decoding via Proactive Drafting and Adaptive Continuation: Jialiang Kang,

Han Shu,

Wenshuo Li,

Yingjie Zhai,

Xinghao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Jialiang and Shu, Han and Li, Wenshuo and Zhai, Yingjie and Chen, Xinghao}, title = {SJD-PAC: Accelerating Speculative Jacobi Decoding via Proactive Drafting and Adaptive Continuation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16666-16675} }
NitroGen: An Open Foundation Model for Generalist Gaming Agents: Loïc Magne,

Anas Awadalla,

Guanzhi Wang,

Yinzhen Xu,

Joshua Belofsky,

Fengyuan Hu,

Joohwan Kim,

Ludwig Schmidt,

Georgia Gkioxari,

Jan Kautz,

Yisong Yue,

Yejin Choi,

Yuke Zhu,

Linxi Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magne_2026_CVPR, author = {Magne, Lo{\"\i}c and Awadalla, Anas and Wang, Guanzhi and Xu, Yinzhen and Belofsky, Joshua and Hu, Fengyuan and Kim, Joohwan and Schmidt, Ludwig and Gkioxari, Georgia and Kautz, Jan and Yue, Yisong and Choi, Yejin and Zhu, Yuke and Fan, Linxi}, title = {NitroGen: An Open Foundation Model for Generalist Gaming Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21511-21521} }
VEMamba: Efficient Isotropic Reconstruction of Volume Electron Microscopy with Axial-Lateral Consistent Mamba: Longmi Gao,

Pan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Longmi and Gao, Pan}, title = {VEMamba: Efficient Isotropic Reconstruction of Volume Electron Microscopy with Axial-Lateral Consistent Mamba}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15585-15594} }
HumanNOVA: Photorealistic, Universal and Rapid 3D Human Avatar Modeling from a Single Image: Hezhen Hu,

Wangbo Zhao,

Lanqing Guo,

Hanwen Jiang,

Jonathan C. Liu,

Zhiwen Fan,

Kai Wang,

Zhangyang Wang,

Georgios Pavlakos; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Hezhen and Zhao, Wangbo and Guo, Lanqing and Jiang, Hanwen and Liu, Jonathan C. and Fan, Zhiwen and Wang, Kai and Wang, Zhangyang and Pavlakos, Georgios}, title = {HumanNOVA: Photorealistic, Universal and Rapid 3D Human Avatar Modeling from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18096-18106} }
ULF-Loc: Unbiased Landmark Feature for Robust Visual Localization with 3D Gaussian Splatting: Yingdong Gu,

Shaocheng Yan,

Zhenjun Zhao,

Yuan Kou,

Jianxin Luo,

Pengcheng Shi,

Jiayuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Yingdong and Yan, Shaocheng and Zhao, Zhenjun and Kou, Yuan and Luo, Jianxin and Shi, Pengcheng and Li, Jiayuan}, title = {ULF-Loc: Unbiased Landmark Feature for Robust Visual Localization with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19065-19075} }
Reasoning Palette: Modulating Reasoning via Latent Contextualization for Controllable Exploration for (V)LMs: Rujiao Long,

Yang Li,

Xingyao Zhang,

Weixun Wang,

Tianqianjin Lin,

Xi Zhao,

Yuchi Xu,

Wenbo Su,

Junchi Yan,

Bo Zheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Rujiao and Li, Yang and Zhang, Xingyao and Wang, Weixun and Lin, Tianqianjin and Zhao, Xi and Xu, Yuchi and Su, Wenbo and Yan, Junchi and Zheng, Bo}, title = {Reasoning Palette: Modulating Reasoning via Latent Contextualization for Controllable Exploration for (V)LMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19463-19474} }
Registration-Free Learnable Multi-View Capture of Faces in Dense Semantic Correspondence: Panagiotis P. Filntisis,

George Retsinas,

Radek Danecek,

Vanessa Sklyarova,

Petros Maragos,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Filntisis_2026_CVPR, author = {Filntisis, Panagiotis P. and Retsinas, George and Danecek, Radek and Sklyarova, Vanessa and Maragos, Petros and Bolkart, Timo}, title = {Registration-Free Learnable Multi-View Capture of Faces in Dense Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14512-14523} }
Convolutional Neural Networks Driven by Content Similarity: Ligeng Zou,

Guihu Zhao; [pdf]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Ligeng and Zhao, Guihu}, title = {Convolutional Neural Networks Driven by Content Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27449-27459} }
PEARL: Geometry Aligns Semantics for Training-Free Open-Vocabulary Semantic Segmentation: Gensheng Pei,

Xiruo Jiang,

Xinhao Cai,

Tao Chen,

Yazhou Yao,

Byeungwoo Jeon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2026_CVPR, author = {Pei, Gensheng and Jiang, Xiruo and Cai, Xinhao and Chen, Tao and Yao, Yazhou and Jeon, Byeungwoo}, title = {PEARL: Geometry Aligns Semantics for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17927-17937} }
Protect to Adapt: Orthogonal Subspace Control with Ranked Negative-Prompt Curriculum for Few-Shot Action Recognition: Hantao Qi,

Yan Yan,

Junlong Gao,

Hanzi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Hantao and Yan, Yan and Gao, Junlong and Wang, Hanzi}, title = {Protect to Adapt: Orthogonal Subspace Control with Ranked Negative-Prompt Curriculum for Few-Shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20160-20169} }
Differentially Private 2D Human Pose Estimation: Kaushik Bhargav Sivangi,

Paul Henderson,

Fani Deligianni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sivangi_2026_CVPR, author = {Sivangi, Kaushik Bhargav and Henderson, Paul and Deligianni, Fani}, title = {Differentially Private 2D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21143-21153} }
MetricHMSR: Metric Human Mesh and Scene Recovery from Monocular Images: Chentao Song,

He Zhang,

Haolei Yuan,

Haozhe Lin,

Jianhua Tao,

Hongwen Zhang,

Tao Yu; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Chentao and Zhang, He and Yuan, Haolei and Lin, Haozhe and Tao, Jianhua and Zhang, Hongwen and Yu, Tao}, title = {MetricHMSR: Metric Human Mesh and Scene Recovery from Monocular Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21132-21142} }
ChartNet: A Million-Scale, High-Quality Multimodal Dataset for Robust Chart Understanding: Jovana Kondic,

Pengyuan Li,

Dhiraj Joshi,

Isaac Sanchez,

Ben Wiesel,

Shafiq Abedin,

Amit Alfassy,

Eli Schwartz,

Daniel Caraballo,

Yagmur Gizem Cinar,

Florian Scheidegger,

Steven I. Ross,

Daniel Karl I. Weidele,

Hang Hua,

Ekaterina Arutyunova,

Roei Herzig,

Zihan Wang,

Xinyue Yu,

Yunfei Zhao,

Sicong Jiang,

Minghao Liu,

Qunshu Lin,

Aude Oliva,

Rogerio Feris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kondic_2026_CVPR, author = {Kondic, Jovana and Li, Pengyuan and Joshi, Dhiraj and Sanchez, Isaac and Wiesel, Ben and Abedin, Shafiq and Alfassy, Amit and Schwartz, Eli and Caraballo, Daniel and Cinar, Yagmur Gizem and Scheidegger, Florian and Ross, Steven I. and Weidele, Daniel Karl I. and Hua, Hang and Arutyunova, Ekaterina and Herzig, Roei and Wang, Zihan and Yu, Xinyue and Zhao, Yunfei and Jiang, Sicong and Liu, Minghao and Lin, Qunshu and Oliva, Aude and Feris, Rogerio}, title = {ChartNet: A Million-Scale, High-Quality Multimodal Dataset for Robust Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15922-15932} }
GraspGen-X: Cross-Embodiment 6-DOF Diffusion-based Grasping: Beining Han,

Yu-Wei Chao,

Erwin Coumans,

Clemens Eppner,

Jia Deng,

Stan Birchfield,

Adithyavairavan Murali; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Beining and Chao, Yu-Wei and Coumans, Erwin and Eppner, Clemens and Deng, Jia and Birchfield, Stan and Murali, Adithyavairavan}, title = {GraspGen-X: Cross-Embodiment 6-DOF Diffusion-based Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20878-20889} }
H2-Surv: Hierarchical Hyperbolic Multimodal Representation Learning for Survival Prediction: Jiaqi Yang,

Wenting Chen,

Xiangjian He,

Yuanbai Li,

Sen Yang,

Linlin Shen,

Xiaohan Xing; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiaqi and Chen, Wenting and He, Xiangjian and Li, Yuanbai and Yang, Sen and Shen, Linlin and Xing, Xiaohan}, title = {H2-Surv: Hierarchical Hyperbolic Multimodal Representation Learning for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28318-28327} }
BiProLoRA: Bilevel Prompt LoRA for Real Scene Recovery: Nan An,

Long Ma,

Tengyu Ma,

Zhu Liu,

Yingchi Liu,

Risheng Liu; [pdf]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Nan and Ma, Long and Ma, Tengyu and Liu, Zhu and Liu, Yingchi and Liu, Risheng}, title = {BiProLoRA: Bilevel Prompt LoRA for Real Scene Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15465-15475} }
OneStory: Coherent Multi-Shot Video Generation with Adaptive Memory: Zhaochong An,

Menglin Jia,

Haonan Qiu,

Zijian Zhou,

Xiaoke Huang,

Zhiheng Liu,

Weiming Ren,

Kumara Kahatapitiya,

Ding Liu,

Sen He,

Chenyang Zhang,

Tao Xiang,

Fanny Yang,

Serge Belongie,

Tian Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Zhaochong and Jia, Menglin and Qiu, Haonan and Zhou, Zijian and Huang, Xiaoke and Liu, Zhiheng and Ren, Weiming and Kahatapitiya, Kumara and Liu, Ding and He, Sen and Zhang, Chenyang and Xiang, Tao and Yang, Fanny and Belongie, Serge and Xie, Tian}, title = {OneStory: Coherent Multi-Shot Video Generation with Adaptive Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16173-16184} }
OMoBlur: An Object Motion Blur Dataset and Benchmark for Real-World Local Motion Deblurring: Dingchuan Yu,

Jiatong Li,

Jingwen Zhou,

Zhengyue Zhuge,

Yueting Chen,

Qi Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Dingchuan and Li, Jiatong and Zhou, Jingwen and Zhuge, Zhengyue and Chen, Yueting and Li, Qi}, title = {OMoBlur: An Object Motion Blur Dataset and Benchmark for Real-World Local Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22626-22635} }
Locate-then-Sparsify: Attribution Guided Sparse Strategy for Visual Hallucination Mitigation: Tiantian Dang,

Chao Bi,

Shufan Shen,

Jinzhe Liu,

Qingming Huang,

Shuhui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2026_CVPR, author = {Dang, Tiantian and Bi, Chao and Shen, Shufan and Liu, Jinzhe and Huang, Qingming and Wang, Shuhui}, title = {Locate-then-Sparsify: Attribution Guided Sparse Strategy for Visual Hallucination Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18251-18260} }
Cycle-Consistent Tuning for Layered Image Decomposition: Zheng Gu,

Min Lu,

Zhida Sun,

Dani Lischinski,

Daniel Cohen-Or,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zheng and Lu, Min and Sun, Zhida and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {Cycle-Consistent Tuning for Layered Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22478-22487} }
Structural Graph Probing of Vision-Language Models: Haoyu He,

Yue Zhuo,

Yu Zheng,

Qi R. Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Haoyu and Zhuo, Yue and Zheng, Yu and Wang, Qi R.}, title = {Structural Graph Probing of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24084-24094} }
SpatialDiff: 3D-Aware Object Movement via Implicit Spatial Modeling: Zheng Liu,

Zijian He,

Huiguo He,

Weizhi Zhong,

Yejun Tang,

Huan Yang,

Kun Gai,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zheng and He, Zijian and He, Huiguo and Zhong, Weizhi and Tang, Yejun and Yang, Huan and Gai, Kun and Li, Guanbin}, title = {SpatialDiff: 3D-Aware Object Movement via Implicit Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18396-18406} }
Voxify3D: Pixel Art Meets Volumetric Rendering: Yi-Chuan Huang,

Jiewen Chan,

Hao-Jen Chien,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yi-Chuan and Chan, Jiewen and Chien, Hao-Jen and Liu, Yu-Lun}, title = {Voxify3D: Pixel Art Meets Volumetric Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15398-15410} }
Beyond Missing Modalities: Hypergraph Conditioned Diffusion for Uncertainty-Aware Multimodal Emotion Recognition: Xihang Qiu,

Yuhao Fang,

Qing Zhou,

Bin Zhai,

Jialong Hong,

Wanpeng Zhang,

Yao Lu,

Ye Zhang,

Chun Li; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xihang and Fang, Yuhao and Zhou, Qing and Zhai, Bin and Hong, Jialong and Zhang, Wanpeng and Lu, Yao and Zhang, Ye and Li, Chun}, title = {Beyond Missing Modalities: Hypergraph Conditioned Diffusion for Uncertainty-Aware Multimodal Emotion Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22953-22963} }
Cross-View Distillation and Adaptive Masking for Incomplete Multi-View Multi-Label Classification: Yadong Liu,

Qiaoqi Li,

Yueying Wang,

Lunke Fei,

Jie Wen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yadong and Li, Qiaoqi and Wang, Yueying and Fei, Lunke and Wen, Jie}, title = {Cross-View Distillation and Adaptive Masking for Incomplete Multi-View Multi-Label Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23051-23060} }
Prefill-Time Intervention for Mitigating Hallucination in Large Vision-Language Models: Chengsheng Zhang,

Chenghao Sun,

Xinyan Jiang,

Wei Li,

Xinmei Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chengsheng and Sun, Chenghao and Jiang, Xinyan and Li, Wei and Tian, Xinmei}, title = {Prefill-Time Intervention for Mitigating Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25293-25303} }
R2G: A Multi-View Circuit Graph Benchmark Suite from RTL to GDSII: Zewei Zhou,

Jiajun Zou,

Jiajia Zhang,

Ao Yang,

Ruichao He,

Haozheng Zhou,

Ao Liu,

Jiawei Liu,

Leilei Jin,

Shan Shen,

Daying Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zewei and Zou, Jiajun and Zhang, Jiajia and Yang, Ao and He, Ruichao and Zhou, Haozheng and Liu, Ao and Liu, Jiawei and Jin, Leilei and Shen, Shan and Sun, Daying}, title = {R2G: A Multi-View Circuit Graph Benchmark Suite from RTL to GDSII}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18827-18836} }
JUMP-Hand: Learning Joint-wise Uncertainty to Gate Mixture of View Experts for Multi-View 3D Hand Reconstruction: Haohong Kuang,

Yang Xiao,

Changlong Jiang,

Jinghong Zheng,

Hang Xu,

Ran Wang,

Zhiguo Cao,

Joey Tianyi Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Kuang_2026_CVPR, author = {Kuang, Haohong and Xiao, Yang and Jiang, Changlong and Zheng, Jinghong and Xu, Hang and Wang, Ran and Cao, Zhiguo and Zhou, Joey Tianyi}, title = {JUMP-Hand: Learning Joint-wise Uncertainty to Gate Mixture of View Experts for Multi-View 3D Hand Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28348-28357} }
Anatomical Domain Shifts: Test-time Heterogeneous Adaptation for 3D Human Pose Prediction: Qiongjie Cui,

Pan Zhou,

Jingjing Chen,

Na Zhao; [pdf]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Qiongjie and Zhou, Pan and Chen, Jingjing and Zhao, Na}, title = {Anatomical Domain Shifts: Test-time Heterogeneous Adaptation for 3D Human Pose Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28369-28378} }
SubspaceAD: Training-Free Few-Shot Anomaly Detection via Subspace Modeling: Camile Lendering,

Erkut Akdag,

Egor Bondarau; [pdf] [supp]
[bibtex]
@InProceedings{Lendering_2026_CVPR, author = {Lendering, Camile and Akdag, Erkut and Bondarau, Egor}, title = {SubspaceAD: Training-Free Few-Shot Anomaly Detection via Subspace Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28557-28566} }
FlashMesh: Faster and Better Autoregressive Mesh Synthesis via Structured Speculation: Tingrui Shen,

Yiheng Zhang,

Chen Tang,

Chuan Ping,

Zixing Zhao,

Le Wan,

Yuwang Wang,

Ronggang Wang,

Shengfeng He; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Tingrui and Zhang, Yiheng and Tang, Chen and Ping, Chuan and Zhao, Zixing and Wan, Le and Wang, Yuwang and Wang, Ronggang and He, Shengfeng}, title = {FlashMesh: Faster and Better Autoregressive Mesh Synthesis via Structured Speculation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27052-27061} }
SeeThrough3D: Occlusion Aware 3D Control in Text-to-Image Generation: Vaibhav Agrawal,

Rishubh Parihar,

Pradhaan S Bhat,

Ravi Kiran Sarvadevabhatla,

Venkatesh Babu Radhakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agrawal_2026_CVPR, author = {Agrawal, Vaibhav and Parihar, Rishubh and Bhat, Pradhaan S and Sarvadevabhatla, Ravi Kiran and Radhakrishnan, Venkatesh Babu}, title = {SeeThrough3D: Occlusion Aware 3D Control in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25403-25414} }
Exemplar-Free Class Incremental Learning via Preserving Class-Discriminative Structure: Xin Zhang,

Liang Bai,

Guanchao Wang,

Xian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xin and Bai, Liang and Wang, Guanchao and Yang, Xian}, title = {Exemplar-Free Class Incremental Learning via Preserving Class-Discriminative Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17979-17988} }
Do You See What I Am Pointing At? Gesture-Based Egocentric Video Question Answering: Yura Choi,

Roy Miles,

Rolandos Alexandros Potamias,

Ismail Elezi,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Yura and Miles, Roy and Potamias, Rolandos Alexandros and Elezi, Ismail and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Do You See What I Am Pointing At? Gesture-Based Egocentric Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18534-18544} }
3D Gaussian Splatting with Self-Constrained Priors for High Fidelity Surface Reconstruction: Takeshi Noda,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noda_2026_CVPR, author = {Noda, Takeshi and Liu, Yu-Shen and Han, Zhizhong}, title = {3D Gaussian Splatting with Self-Constrained Priors for High Fidelity Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26041-26051} }
VideoSSR: Video Self-Supervised Reinforcement Learning: Zefeng He,

Xiaoye Qu,

Yafu Li,

Siyuan Huang,

Daizong Liu,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zefeng and Qu, Xiaoye and Li, Yafu and Huang, Siyuan and Liu, Daizong and Cheng, Yu}, title = {VideoSSR: Video Self-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26530-26540} }
Generalizing Visual Geometry Priors to Sparse Gaussian Occupancy Prediction: Changqing Zhou,

Yueru Luo,

Changhao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Changqing and Luo, Yueru and Chen, Changhao}, title = {Generalizing Visual Geometry Priors to Sparse Gaussian Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28578-28587} }
Scan Clusters, Not Pixels: A Cluster-Centric Paradigm for Efficient Ultra-high-definition Image Restoration: Chen Wu,

Ling Wang,

Zhuoran Zheng,

Yuning Cui,

Zhixiong Yang,

Xiangyu Chen,

Yue Zhang,

Weidong Jiang,

Jingyuan Xia; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chen and Wang, Ling and Zheng, Zhuoran and Cui, Yuning and Yang, Zhixiong and Chen, Xiangyu and Zhang, Yue and Jiang, Weidong and Xia, Jingyuan}, title = {Scan Clusters, Not Pixels: A Cluster-Centric Paradigm for Efficient Ultra-high-definition Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15528-15537} }
SPARROW: Learning Spatial Precision and Temporal Referential Consistency in Pixel-Grounded Video MLLMs: Mohamad Alansari,

Naufal Suryanto,

Divya Velayudhan,

Sajid Javed,

Naoufel Werghi,

Muzammal Naseer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alansari_2026_CVPR, author = {Alansari, Mohamad and Suryanto, Naufal and Velayudhan, Divya and Javed, Sajid and Werghi, Naoufel and Naseer, Muzammal}, title = {SPARROW: Learning Spatial Precision and Temporal Referential Consistency in Pixel-Grounded Video MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17545-17556} }
Iterative Closed-Loop Motion Synthesis for Scaling the Capabilities of Humanoid Control: Weisheng Xu,

Qiwei Wu,

Jiaxi Zhang,

Jing Tan,

Yangfan Li,

Yuetong Fang,

Jiaqi Xiong,

Kai Wu,

Rong Ou,

Renjing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Weisheng and Wu, Qiwei and Zhang, Jiaxi and Tan, Jing and Li, Yangfan and Fang, Yuetong and Xiong, Jiaqi and Wu, Kai and Ou, Rong and Xu, Renjing}, title = {Iterative Closed-Loop Motion Synthesis for Scaling the Capabilities of Humanoid Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16398-16407} }
PhysGaia: A Physics-aware Benchmark with Multi-Body Interactions for Dynamic Novel View Synthesis: Mijeong Kim,

Gunhee Kim,

Jungyoon Choi,

Wonjae Roh,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Mijeong and Kim, Gunhee and Choi, Jungyoon and Roh, Wonjae and Han, Bohyung}, title = {PhysGaia: A Physics-aware Benchmark with Multi-Body Interactions for Dynamic Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22604-22614} }
Ghost-FWL: A Large-Scale Full-Waveform LiDAR Dataset for Ghost Detection and Removal: Kazuma Ikeda,

Ryosei Hara,

Rokuto Nagata,

Ozora Sako,

Zihao Ding,

Takahiro Kado,

Ibuki Fujioka,

Taro Beppu,

Mariko Isogawa,

Kentaro Yoshioka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ikeda_2026_CVPR, author = {Ikeda, Kazuma and Hara, Ryosei and Nagata, Rokuto and Sako, Ozora and Ding, Zihao and Kado, Takahiro and Fujioka, Ibuki and Beppu, Taro and Isogawa, Mariko and Yoshioka, Kentaro}, title = {Ghost-FWL: A Large-Scale Full-Waveform LiDAR Dataset for Ghost Detection and Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17164-17173} }
Detecting Unknown Objects via Energy-based Separation for Open World Object Detection: Jun-Woo Heo,

Keonhee Park,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2026_CVPR, author = {Heo, Jun-Woo and Park, Keonhee and Park, Gyeong-Moon}, title = {Detecting Unknown Objects via Energy-based Separation for Open World Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27558-27567} }
Neural-Centric Video Processing Pipeline for Unified Multi-Task Inference: Seyeon Lee,

Juncheol Ye,

Jaehong Kim,

Dongsu Han; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seyeon and Ye, Juncheol and Kim, Jaehong and Han, Dongsu}, title = {Neural-Centric Video Processing Pipeline for Unified Multi-Task Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18555-18564} }
Scalable Multi-View Subspace Clustering with Tensorized Anchor Guidance: Miao Jia,

Xingchen Hu,

Jiyuan Liu,

Siwei Wang,

Min Wang,

Zijian Chen; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Miao and Hu, Xingchen and Liu, Jiyuan and Wang, Siwei and Wang, Min and Chen, Zijian}, title = {Scalable Multi-View Subspace Clustering with Tensorized Anchor Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14367-14376} }
Image-Guided Geometric Stylization of 3D Meshes: Changwoon Choi,

Hyunsoo Lee,

Clément Jambon,

Yael Vinker,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Changwoon and Lee, Hyunsoo and Jambon, Cl\'ement and Vinker, Yael and Kim, Young Min}, title = {Image-Guided Geometric Stylization of 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19972-19981} }
UCAN: Unified Convolutional Attention Network for Expansive Receptive Fields in Lightweight Super-Resolution: Cao Thien Tan,

Phan Thi Thu Trang,

Do Nghiem Duc,

Ho Ngoc Anh,

Hanyang Zhuang,

Nguyen Duc Dung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Cao Thien and Trang, Phan Thi Thu and Duc, Do Nghiem and Anh, Ho Ngoc and Zhuang, Hanyang and Dung, Nguyen Duc}, title = {UCAN: Unified Convolutional Attention Network for Expansive Receptive Fields in Lightweight Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23409-23418} }
Splat-Based Metal Artifact Reduction in Cone-Beam CT via Compact Attenuation Modeling: Kiseok Choi,

Jaemin Cho,

Inchul Kim,

Min H. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Kiseok and Cho, Jaemin and Kim, Inchul and Kim, Min H.}, title = {Splat-Based Metal Artifact Reduction in Cone-Beam CT via Compact Attenuation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26751-26760} }
TRANSPORTER: Transferring Visual Semantics from VLM Manifolds: Alexandros Stergiou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stergiou_2026_CVPR, author = {Stergiou, Alexandros}, title = {TRANSPORTER: Transferring Visual Semantics from VLM Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24129-24140} }
PerformRecast: Expression and Head Pose Disentanglement for Portrait Video Editing: Jiadong Liang,

Bojun Xiong,

Jie Tian,

Hua Li,

Xiao Long,

Yong Zheng,

Huan Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Jiadong and Xiong, Bojun and Tian, Jie and Li, Hua and Long, Xiao and Zheng, Yong and Fu, Huan}, title = {PerformRecast: Expression and Head Pose Disentanglement for Portrait Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25131-25141} }
Zero-Shot Reconstruction of Animatable 3D Avatars with Cloth Dynamics from a Single Image: Joohyun Kwon,

Geonhee Sim,

Gyeongsik Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Joohyun and Sim, Geonhee and Moon, Gyeongsik}, title = {Zero-Shot Reconstruction of Animatable 3D Avatars with Cloth Dynamics from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18182-18192} }
Learning Latent Proxies for Controllable Single-Image Relighting: Haoze Zheng,

Zihao Wang,

Xianfeng Wu,

Yajing Bai,

Yexin Liu,

Yun Li,

Xiaogang Xu,

Harry Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haoze and Wang, Zihao and Wu, Xianfeng and Bai, Yajing and Liu, Yexin and Li, Yun and Xu, Xiaogang and Yang, Harry}, title = {Learning Latent Proxies for Controllable Single-Image Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27303-27312} }
ChangeBridge: Spatiotemporal Image Generation with Multimodal Controls for Remote Senisng: Zhenghui Zhao,

Chen Wu,

Xiangyong Cao,

Di Wang,

Hongruixuan Chen,

Datao Tang,

Liangpei Zhang,

Zhuo Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zhenghui and Wu, Chen and Cao, Xiangyong and Wang, Di and Chen, Hongruixuan and Tang, Datao and Zhang, Liangpei and Zheng, Zhuo}, title = {ChangeBridge: Spatiotemporal Image Generation with Multimodal Controls for Remote Senisng}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27761-27771} }
AcTTA: Rethinking Test-Time Adaptation via Dynamic Activation: Hyeongyu Kim,

Geonhui Han,

Dosik Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeongyu and Han, Geonhui and Hwang, Dosik}, title = {AcTTA: Rethinking Test-Time Adaptation via Dynamic Activation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22217-22226} }
SAGA: Source Attribution of Generative AI Videos: Rohit Kundu,

Vishal Mohanty,

Hao Xiong,

Shan Jia,

Athula Balachandran,

Amit K. Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kundu_2026_CVPR, author = {Kundu, Rohit and Mohanty, Vishal and Xiong, Hao and Jia, Shan and Balachandran, Athula and Roy-Chowdhury, Amit K.}, title = {SAGA: Source Attribution of Generative AI Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21273-21283} }
Enhancing Continual Learning of Vision-Language Models via Dynamic Prefix Weighting: Hyeonseo Jang,

Hyuk Kwon,

Kibok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Hyeonseo and Kwon, Hyuk and Lee, Kibok}, title = {Enhancing Continual Learning of Vision-Language Models via Dynamic Prefix Weighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18042-18052} }
Temporal Inversion for Learning Interval Change in Chest X-Rays: Hanbin Ko,

Kyeongmin Jeon,

Doowoong Choi,

Chang Min Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Hanbin and Jeon, Kyeongmin and Choi, Doowoong and Park, Chang Min}, title = {Temporal Inversion for Learning Interval Change in Chest X-Rays}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28338-28347} }
MoVie: Broaden Your Views with Human Motion for Action Detection: Di Yang,

Mahmoud Ali,

Xuanlong Yu,

Xi Shen,

Quan Kong,

Gianpiero Francesca,

François Brémond; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Di and Ali, Mahmoud and Yu, Xuanlong and Shen, Xi and Kong, Quan and Francesca, Gianpiero and Br\'emond, Fran\c{c}ois}, title = {MoVie: Broaden Your Views with Human Motion for Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27313-27323} }
CodeDance: A Dynamic Tool-integrated MLLM for Executable Visual Reasoning: Qi Song,

Honglin Li,

Yingchen Yu,

Haoyi Zhou,

Lin Yang,

Song Bai,

Qi She,

Zilong Huang,

Yunqing Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Qi and Li, Honglin and Yu, Yingchen and Zhou, Haoyi and Yang, Lin and Bai, Song and She, Qi and Huang, Zilong and Zhao, Yunqing}, title = {CodeDance: A Dynamic Tool-integrated MLLM for Executable Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19186-19195} }
SpatialTree: How Spatial Intelligence Branches Out in MLLMs: Yuxi Xiao,

Longfei Li,

Shen Yan,

Xinhang Liu,

Sida Peng,

Yunchao Wei,

Xiaowei Zhou,

Bingyi Kang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Yuxi and Li, Longfei and Yan, Shen and Liu, Xinhang and Peng, Sida and Wei, Yunchao and Zhou, Xiaowei and Kang, Bingyi}, title = {SpatialTree: How Spatial Intelligence Branches Out in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16701-16711} }
Prototypical Action Reasoning Facilitated by Vision-Language Alignment for Egocentric Action Anticipation: Jiang Shao,

Xinbo Zhao,

Wenyin Tuo,

Xiaochun Zou; [pdf]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Jiang and Zhao, Xinbo and Tuo, Wenyin and Zou, Xiaochun}, title = {Prototypical Action Reasoning Facilitated by Vision-Language Alignment for Egocentric Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24000-24009} }
PECCVAI: Overcoming the Brittleness of AI Image Watermarking Under Visual Paraphrasing Attacks: Shreyas Dixit,

Ashhar Aziz,

Shashwat Bajpai,

Vasu Sharma,

Aman Chadha,

Vinija Jain,

Amitava Das; [pdf] [supp]
[bibtex]
@InProceedings{Dixit_2026_CVPR, author = {Dixit, Shreyas and Aziz, Ashhar and Bajpai, Shashwat and Sharma, Vasu and Chadha, Aman and Jain, Vinija and Das, Amitava}, title = {PECCVAI: Overcoming the Brittleness of AI Image Watermarking Under Visual Paraphrasing Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24471-24480} }
D^3FER: Dual Channel and Dual Branch Network for Robust Facial Expression Recognition under Dual Challenges: Hui Tang,

Yifan He,

Zhong Jin; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Hui and He, Yifan and Jin, Zhong}, title = {D{\textasciicircum}3FER: Dual Channel and Dual Branch Network for Robust Facial Expression Recognition under Dual Challenges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18085-18095} }
SAME: Sparse and Anchored Model Editing for Heterogeneous Incremental Learning under Limited Data: Zixuan Duan,

Zeyu Zhang,

Fengyuan Lu,

Shaofeng Zhang,

Wenbin Li,

Qi Fan,

Yang Gao; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Zixuan and Zhang, Zeyu and Lu, Fengyuan and Zhang, Shaofeng and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {SAME: Sparse and Anchored Model Editing for Heterogeneous Incremental Learning under Limited Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25055-25065} }
OptiMVMap: Offline Vectorized Map Construction via Optimal Multi-vehicle Perspectives: Zedong Dan,

Zijie Wang,

Wei Zhang,

Xiangru Lin,

Weiming Zhang,

Xiao Tan,

Jingdong Wang,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dan_2026_CVPR, author = {Dan, Zedong and Wang, Zijie and Zhang, Wei and Lin, Xiangru and Zhang, Weiming and Tan, Xiao and Wang, Jingdong and Lin, Liang and Li, Guanbin}, title = {OptiMVMap: Offline Vectorized Map Construction via Optimal Multi-vehicle Perspectives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18140-18149} }
Human-Centric Multi-Exposure Fusion: Benchmark and Bi-level Cognition Distillation Framework: Jingjie Shang,

Tengyu Ma,

Heng Zhang,

Jinyuan Liu,

Risheng Liu,

Yuan Wang,

Xiaochen Bo; [pdf] [supp]
[bibtex]
@InProceedings{Shang_2026_CVPR, author = {Shang, Jingjie and Ma, Tengyu and Zhang, Heng and Liu, Jinyuan and Liu, Risheng and Wang, Yuan and Bo, Xiaochen}, title = {Human-Centric Multi-Exposure Fusion: Benchmark and Bi-level Cognition Distillation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26572-26581} }
DC-Merge: Improving Model Merging with Directional Consistency: Han-Chen Zhang,

Zi-Hao Zhou,

Mao-Lin Luo,

Shimin Di,

Min-Ling Zhang,

Tong Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Han-Chen and Zhou, Zi-Hao and Luo, Mao-Lin and Di, Shimin and Zhang, Min-Ling and Wei, Tong}, title = {DC-Merge: Improving Model Merging with Directional Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22248-22258} }
Random Wins All: Rethinking Grouping Strategies for Vision Tokens: Qihang Fan,

Yuang Ai,

Huaibo Huang,

Ran He; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Qihang and Ai, Yuang and Huang, Huaibo and He, Ran}, title = {Random Wins All: Rethinking Grouping Strategies for Vision Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27357-27366} }
When Robots Obey the Patch: Universal Transferable Patch Attacks on Vision-Language-Action Models: Hui Lu,

Yi Yu,

Yiming Yang,

Chenyu Yi,

Qixin Zhang,

Bingquan Shen,

Alex C. Kot,

Xudong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Hui and Yu, Yi and Yang, Yiming and Yi, Chenyu and Zhang, Qixin and Shen, Bingquan and Kot, Alex C. and Jiang, Xudong}, title = {When Robots Obey the Patch: Universal Transferable Patch Attacks on Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22867-22878} }
MFEN: Multi-Frequency Expert Network for Visible-Infrared Person Re-ID: Xulin Li,

Yan Lu,

Bin Liu,

Qinhong Yang,

Qi Chu,

Tao Gong,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xulin and Lu, Yan and Liu, Bin and Yang, Qinhong and Chu, Qi and Gong, Tao and Yu, Nenghai}, title = {MFEN: Multi-Frequency Expert Network for Visible-Infrared Person Re-ID}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18471-18480} }
From Softmax to Dirichlet: Evidential Learning for Semi-supervised Semantic Segmentation: Huayu Mai,

Rui Sun,

Yujia Chen,

Wangkai Li,

Bingzhou Wang,

Aibing Li,

Zhangyu He,

Yuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Huayu and Sun, Rui and Chen, Yujia and Li, Wangkai and Wang, Bingzhou and Li, Aibing and He, Zhangyu and Wang, Yuan}, title = {From Softmax to Dirichlet: Evidential Learning for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27697-27707} }
Principled Steering via Null-space Projection for Jailbreak Defense in Vision-Language Models: Xingyu Zhu,

Beier Zhu,

Shuo Wang,

Junfeng Fang,

Kesen Zhao,

Hanwang Zhang,

Xiangnan He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingyu and Zhu, Beier and Wang, Shuo and Fang, Junfeng and Zhao, Kesen and Zhang, Hanwang and He, Xiangnan}, title = {Principled Steering via Null-space Projection for Jailbreak Defense in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22889-22899} }
DiT-IC: Aligned Diffusion Transformer for Efficient Image Compression: Junqi Shi,

Ming Lu,

Xingchen Li,

Anle Ke,

Ruiqi Zhang,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Junqi and Lu, Ming and Li, Xingchen and Ke, Anle and Zhang, Ruiqi and Ma, Zhan}, title = {DiT-IC: Aligned Diffusion Transformer for Efficient Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25719-25729} }
TokenGS: Decoupling 3D Gaussian Prediction from Pixels with Learnable Tokens: Jiawei Ren,

Michal Jan Tyszkiewicz,

Jiahui Huang,

Zan Gojcic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Jiawei and Tyszkiewicz, Michal Jan and Huang, Jiahui and Gojcic, Zan}, title = {TokenGS: Decoupling 3D Gaussian Prediction from Pixels with Learnable Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15365-15375} }
CGHair: Compact Gaussian Hair Reconstruction with Card Clustering: Haimin Luo,

Srinjay Sarkar,

Albert Mosella-Montoro,

Francisco Vicente Carrasco,

Fernando De la Torre; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Haimin and Sarkar, Srinjay and Mosella-Montoro, Albert and Carrasco, Francisco Vicente and De la Torre, Fernando}, title = {CGHair: Compact Gaussian Hair Reconstruction with Card Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25225-25235} }; Back